/* Function: hypre_CSRMatrixElimCreate Prepare the Ae matrix: count nnz, initialize I, allocate J and data. */ void hypre_CSRMatrixElimCreate(hypre_CSRMatrix *A, hypre_CSRMatrix *Ae, HYPRE_Int nrows, HYPRE_Int *rows, HYPRE_Int ncols, HYPRE_Int *cols, HYPRE_Int *col_mark) { HYPRE_Int i, j, col; HYPRE_Int A_beg, A_end; HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int A_rows = hypre_CSRMatrixNumRows(A); hypre_CSRMatrixI(Ae) = hypre_TAlloc(HYPRE_Int, A_rows+1); HYPRE_Int *Ae_i = hypre_CSRMatrixI(Ae); HYPRE_Int nnz = 0; for (i = 0; i < A_rows; i++) { Ae_i[i] = nnz; A_beg = A_i[i]; A_end = A_i[i+1]; if (hypre_BinarySearch(rows, i, nrows) >= 0) { /* full row */ nnz += A_end - A_beg; if (col_mark) { for (j = A_beg; j < A_end; j++) { col_mark[A_j[j]] = 1; } } } else { /* count columns */ for (j = A_beg; j < A_end; j++) { col = A_j[j]; if (hypre_BinarySearch(cols, col, ncols) >= 0) { nnz++; if (col_mark) { col_mark[col] = 1; } } } } } Ae_i[A_rows] = nnz; hypre_CSRMatrixJ(Ae) = hypre_TAlloc(HYPRE_Int, nnz); hypre_CSRMatrixData(Ae) = hypre_TAlloc(HYPRE_Real, nnz); hypre_CSRMatrixNumNonzeros(Ae) = nnz; }
HYPRE_Int hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A) { hypre_ParCSRCommPkg *comm_pkg; MPI_Comm comm = hypre_ParCSRMatrixComm(A); /* hypre_MPI_Datatype *recv_mpi_types; hypre_MPI_Datatype *send_mpi_types; */ HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A); HYPRE_Int ierr = 0; HYPRE_Int num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)); HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A); hypre_MatTCommPkgCreate_core ( comm, col_map_offd, first_col_diag, col_starts, num_rows_diag, num_cols_diag, num_cols_offd, row_starts, hypre_ParCSRMatrixFirstColDiag(A), hypre_ParCSRMatrixColMapOffd(A), hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ), 1, &num_recvs, &recv_procs, &recv_vec_starts, &num_sends, &send_procs, &send_map_starts, &send_map_elmts ); comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1); hypre_ParCSRCommPkgComm(comm_pkg) = comm; hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs; hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts; hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends; hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs; hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts; hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts; hypre_ParCSRMatrixCommPkgT(A) = comm_pkg; return ierr; }
/* Function: hypre_CSRMatrixEliminateRowsCols Eliminate rows and columns of A, store eliminated values in Ae. If 'diag' is nonzero, the eliminated diagonal of A is set to identity. If 'col_remap' is not NULL it specifies renumbering of columns of Ae. */ void hypre_CSRMatrixEliminateRowsCols(hypre_CSRMatrix *A, hypre_CSRMatrix *Ae, HYPRE_Int nrows, HYPRE_Int *rows, HYPRE_Int ncols, HYPRE_Int *cols, int diag, HYPRE_Int* col_remap) { HYPRE_Int i, j, k, col; HYPRE_Int A_beg, Ae_beg, A_end; HYPRE_Real a; HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Real *A_data = hypre_CSRMatrixData(A); HYPRE_Int A_rows = hypre_CSRMatrixNumRows(A); HYPRE_Int *Ae_i = hypre_CSRMatrixI(Ae); HYPRE_Int *Ae_j = hypre_CSRMatrixJ(Ae); HYPRE_Real *Ae_data = hypre_CSRMatrixData(Ae); for (i = 0; i < A_rows; i++) { A_beg = A_i[i]; A_end = A_i[i+1]; Ae_beg = Ae_i[i]; if (hypre_BinarySearch(rows, i, nrows) >= 0) { /* eliminate row */ for (j = A_beg, k = Ae_beg; j < A_end; j++, k++) { col = A_j[j]; Ae_j[k] = col_remap ? col_remap[col] : col; a = (diag && col == i) ? 1.0 : 0.0; Ae_data[k] = A_data[j] - a; A_data[j] = a; } } else { /* eliminate columns */ for (j = A_beg, k = Ae_beg; j < A_end; j++) { col = A_j[j]; if (hypre_BinarySearch(cols, col, ncols) >= 0) { Ae_j[k] = col_remap ? col_remap[col] : col; Ae_data[k] = A_data[j]; A_data[j] = 0.0; k++; } } } } }
void RowsWithColumn_original ( HYPRE_Int * rowmin, HYPRE_Int * rowmax, HYPRE_Int column, hypre_ParCSRMatrix * A ) /* Finds rows of A which have a nonzero at the given (global) column number. Sets rowmin to the minimum (local) row number of such rows, and rowmax to the max. If there are no such rows, will return rowmax<0<=rowmin */ { hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int * mat_i, * mat_j; HYPRE_Int i, j, num_rows; HYPRE_Int firstColDiag; HYPRE_Int * colMapOffd; mat_i = hypre_CSRMatrixI(diag); mat_j = hypre_CSRMatrixJ(diag); num_rows = hypre_CSRMatrixNumRows(diag); firstColDiag = hypre_ParCSRMatrixFirstColDiag(A); *rowmin = num_rows; *rowmax = -1; for ( i=0; i<num_rows; ++i ) { /* global number: row = i + firstRowIndex;*/ for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) { if ( mat_j[j]+firstColDiag==column ) { /* row i (local row number) has column mat_j[j] (local column number) */ *rowmin = i<*rowmin ? i : *rowmin; *rowmax = i>*rowmax ? i : *rowmax; break; } } } mat_i = hypre_CSRMatrixI(offd); mat_j = hypre_CSRMatrixJ(offd); num_rows = hypre_CSRMatrixNumRows(offd); colMapOffd = hypre_ParCSRMatrixColMapOffd(A); for ( i=0; i<num_rows; ++i ) { /* global number: row = i + firstRowIndex;*/ for ( j=mat_i[i]; j<mat_i[i+1]; ++j ) { if ( colMapOffd[ mat_j[j] ]==column ) { /* row i (local row number) has column mat_j[j] (local column number) */ *rowmin = i<*rowmin ? i : *rowmin; *rowmax = i>*rowmax ? i : *rowmax; break; } } } /* global col no.: mat_j[j]+hypre_ParCSRMatrixFirstColDiag(A) or hypre_ParCSRMatrixColMapOffd(A)[ mat_j[j] ] global row no.: i + hypre_ParCSRMatrixFirstRowIndex(A) */ }
HYPRE_Int AmgCGCBoundaryFix (hypre_ParCSRMatrix *S,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd) /* Checks whether an interpolation is possible for a fine grid point with strong couplings. * Required after CGC coarsening * ======================================================================================== * S : the strength matrix * CF_marker, CF_marker_offd : the coarse/fine markers * ========================================================================================*/ { HYPRE_Int mpirank,i,j,has_c_pt,ierr=0; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int added_cpts=0; MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_MPI_Comm_rank (comm,&mpirank); if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); } for (i=0;i<num_variables;i++) { if (S_offd_i[i]==S_offd_i[i+1] || CF_marker[i] == C_PT) continue; has_c_pt=0; /* fine grid point with strong connections across the boundary */ for (j=S_i[i];j<S_i[i+1];j++) if (CF_marker[S_j[j]] == C_PT) {has_c_pt=1; break;} if (has_c_pt) continue; for (j=S_offd_i[i];j<S_offd_i[i+1];j++) if (CF_marker_offd[S_offd_j[j]] == C_PT) {has_c_pt=1; break;} if (has_c_pt) continue; /* all points i is strongly coupled to are fine: make i C_PT */ CF_marker[i] = C_PT; #if 0 hypre_printf ("Processor %d: added point %d in AmgCGCBoundaryFix\n",mpirank,i); #endif added_cpts++; } #if 0 if (added_cpts) hypre_printf ("Processor %d: added %d points in AmgCGCBoundaryFix\n",mpirank,added_cpts); fflush(stdout); #endif return(ierr); }
HYPRE_Int hypre_CreateDomain (HYPRE_Int *CF_marker, hypre_CSRMatrix *A, HYPRE_Int num_coarse, HYPRE_Int *dof_func, HYPRE_Int **coarse_dof_ptr, HYPRE_Int **domain_i_ptr, HYPRE_Int **domain_j_ptr) { HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int num_vars = hypre_CSRMatrixNumRows(A); HYPRE_Int i, j, cnt, domain, j_indx; HYPRE_Int *domain_i; HYPRE_Int *domain_j; HYPRE_Int *coarse_dof; HYPRE_Int num_pts = 0; domain_i = hypre_CTAlloc(HYPRE_Int, num_coarse+1); coarse_dof = hypre_CTAlloc(HYPRE_Int, num_coarse); cnt = 0; for (i=0; i < num_vars; i++) { if (CF_marker[i] > 0) { num_pts += A_i[i+1]-A_i[i]; coarse_dof[cnt++] = dof_func[i]; } } domain_j = hypre_CTAlloc(HYPRE_Int, num_pts); cnt = 0; domain = 0; domain_i[0] = 0; for (i=0; i < num_vars; i++) { if (CF_marker[i] > 0) { domain_j[cnt++] = i; for (j=A_i[i]; j < A_i[i+1]; j++) { j_indx = A_j[j]; if (CF_marker[j_indx]<1) { domain_j[cnt++] = j_indx; } } domain++; domain_i[domain] = cnt; } } *domain_i_ptr = domain_i; *domain_j_ptr = domain_j; *coarse_dof_ptr = coarse_dof; return 0; }
HYPRE_Int hypre_HarmonicExtension (hypre_CSRMatrix *A, hypre_CSRMatrix *P, HYPRE_Int num_DOF, HYPRE_Int *DOF, HYPRE_Int num_idof, HYPRE_Int *idof, HYPRE_Int num_bdof, HYPRE_Int *bdof) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k, l, m; double factor; HYPRE_Int *IA = hypre_CSRMatrixI(A); HYPRE_Int *JA = hypre_CSRMatrixJ(A); double *dataA = hypre_CSRMatrixData(A); HYPRE_Int *IP = hypre_CSRMatrixI(P); HYPRE_Int *JP = hypre_CSRMatrixJ(P); double *dataP = hypre_CSRMatrixData(P); double * Aii = hypre_CTAlloc(double, num_idof*num_idof); double * Pi = hypre_CTAlloc(double, num_idof*num_DOF); /* Loop over the rows of A */ for (i = 0; i < num_idof; i++) for (j = IA[i]; j < IA[i+1]; j++) { /* Global to local*/ k = hypre_BinarySearch(idof,JA[j], num_idof); /* If a column is a bdof, compute its participation in Pi = Aib x Pb */ if (k == -1) { k = hypre_BinarySearch(bdof,JA[j], num_bdof); if (k > -1) { for (l = IP[k+num_idof]; l < IP[k+num_idof+1]; l++) { m = hypre_BinarySearch(DOF,JP[l], num_DOF); if (m > -1) { m+=i*num_DOF; /* Pi[i*num_DOF+m] += dataA[j] * dataP[l];*/ Pi[m] += dataA[j] * dataP[l]; } } } } /* If a column is an idof, put it in Aii */ else Aii[i*num_idof+k] = dataA[j]; } /* Perform Gaussian elimination in [Aii, Pi] */ for (j = 0; j < num_idof-1; j++) if (Aii[j*num_idof+j] != 0.0) for (i = j+1; i < num_idof; i++) if (Aii[i*num_idof+j] != 0.0) { factor = Aii[i*num_idof+j]/Aii[j*num_idof+j]; for (m = j+1; m < num_idof; m++) Aii[i*num_idof+m] -= factor * Aii[j*num_idof+m]; for (m = 0; m < num_DOF; m++) Pi[i*num_DOF+m] -= factor * Pi[j*num_DOF+m]; } /* Back Substitution */ for (i = num_idof-1; i >= 0; i--) { for (j = i+1; j < num_idof; j++) if (Aii[i*num_idof+j] != 0.0) for (m = 0; m < num_DOF; m++) Pi[i*num_DOF+m] -= Aii[i*num_idof+j] * Pi[j*num_DOF+m]; for (m = 0; m < num_DOF; m++) Pi[i*num_DOF+m] /= Aii[i*num_idof+i]; } /* Put -Pi back in P. We assume that each idof depends on _all_ DOFs */ for (i = 0; i < num_idof; i++, JP += num_DOF, dataP += num_DOF) for (j = 0; j < num_DOF; j++) { JP[j] = DOF[j]; dataP[j] = -Pi[i*num_DOF+j]; } hypre_TFree(Aii); hypre_TFree(Pi); return ierr; }
HYPRE_Int hypre_InexactPartitionOfUnityInterpolation (hypre_CSRMatrix **P_pointer, HYPRE_Int *i_dof_dof, HYPRE_Int *j_dof_dof, HYPRE_Real *a_dof_dof, HYPRE_Real *unit_vector, HYPRE_Int *i_domain_dof, HYPRE_Int *j_domain_dof, HYPRE_Int num_domains, /* == num-coarsedofs */ HYPRE_Int num_dofs) { HYPRE_Int ierr = 0; HYPRE_Int i,j,k; HYPRE_Int ind = 1; HYPRE_Int nu, nu_max = 1; HYPRE_Real eps = 1.e-24; HYPRE_Int max_iter = 1000; HYPRE_Int iter; HYPRE_Real delta0, delta_old, delta, alpha, tau, beta; HYPRE_Real aux, diag; HYPRE_Real *P_t_coeff; hypre_CSRMatrix *P_t, *P; HYPRE_Real *x,*r,*d,*g,*h; HYPRE_Real *row_sum; HYPRE_Int *i_global_to_local; HYPRE_Int local_dof_counter; HYPRE_Real *diag_dof_dof; /* ------------------------------------------------------------------ domain_dof relation should satisfy the following property: num_domains == num_coarsedofs; each domain contains only one coarse dof; ------------------------------------------------------------------ */ i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs); for (i=0; i < num_dofs; i++) i_global_to_local[i] = -1; local_dof_counter = 0; for (i=0; i < num_domains; i++) if (local_dof_counter < i_domain_dof[i+1]-i_domain_dof[i]) local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i]; /* solve T x = unit_vector; --------------------------------------- */ /* cg loop: ------------------------------------------------------- */ hypre_printf("\n---------------------- num_domains: %d, nnz: %d;\n", num_domains, i_domain_dof[num_domains]); x = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); g = hypre_CTAlloc(HYPRE_Real, num_dofs); r = hypre_CTAlloc(HYPRE_Real, num_dofs); h = hypre_CTAlloc(HYPRE_Real, local_dof_counter); diag_dof_dof = hypre_CTAlloc(HYPRE_Real, i_dof_dof[num_dofs]); for (i=0; i<num_dofs; i++) for (j=i_dof_dof[i]; j<i_dof_dof[i+1]; j++) if (i!=j_dof_dof[j]) diag_dof_dof[j] = 0.e0; else diag_dof_dof[j] = a_dof_dof[j]; delta0 = 0.e0; for (i=0; i < num_dofs; i++) { x[i] = 0.e0; r[i] = unit_vector[i]; delta0+=r[i]*r[i]; } /* compute initial iterate: ierr = compute_sum_A_i_action(x, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); ------------------------------------- */ /* matrix vector product: g < -- T x; ------------------------------ */ ierr= compute_sym_GS_T_action(g, x, h, i_domain_dof, j_domain_dof, nu_max, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); delta = 0; for (i=0; i < num_dofs; i++) { r[i] -= g[i]; delta+=r[i]*r[i]; } if (delta < eps * delta0) goto end_cg; ierr= compute_sym_GS_T_action(g, unit_vector, h, i_domain_dof, j_domain_dof, 1, i_dof_dof, j_dof_dof, diag_dof_dof, i_global_to_local, num_domains, num_dofs); /* ierr = compute_sum_A_i_action(d, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); */ for (i=0; i < num_dofs; i++) d[i]=r[i]/g[i]; /* d contains precondtitioned residual: ------------------------ */ delta = 0.e0; for (i=0; i < num_dofs; i++) delta+=d[i]*r[i]; delta0 = delta; eps = 1.e-12; iter = 0; loop: /* matrix vector product: -------------------------------------- */ ierr= compute_sym_GS_T_action(g, d, h, i_domain_dof, j_domain_dof, nu_max, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); tau = 0.e0; for (i=0; i < num_dofs; i++) tau += d[i]*g[i]; alpha = delta/tau; for (i=0; i < num_dofs; i++) { x[i] += alpha * d[i]; r[i] -= alpha * g[i]; } iter++; delta_old = delta; /* ierr = compute_sum_A_i_action(g, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); */ ierr= compute_sym_GS_T_action(g, unit_vector, h, i_domain_dof, j_domain_dof, 1, i_dof_dof, j_dof_dof, diag_dof_dof, i_global_to_local, num_domains, num_dofs); for (i=0; i < num_dofs; i++) g[i] = r[i]/g[i]; delta = 0.e0; for (i=0; i < num_dofs; i++) delta += g[i] * r[i]; hypre_printf("\n---------------------- iter: %d, delta: %le\n", iter, delta); if (delta < eps * delta0 || iter > max_iter) goto end_cg; beta = delta/delta_old; for (i=0; i < num_dofs; i++) d[i] = g[i] + beta * d[i]; goto loop; end_cg: hypre_printf("\n END CG in partition of unity interpolation; num_iters: %d\n", iter); hypre_TFree(r); hypre_TFree(g); hypre_TFree(d); /* ith column of P is T_i x; ----------------------------------- */ P_t_coeff = hypre_CTAlloc(HYPRE_Real, i_domain_dof[num_domains]); for (i=0; i < num_domains; i++) { for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { i_global_to_local[j_domain_dof[j]] = j-i_domain_dof[i]; h[j-i_domain_dof[i]] = 0.e0; } nu = 0; loop_nu: for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux = x[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) if (i_global_to_local[j_dof_dof[k]] > -1) { /* this is a_{i_loc, j_loc} --------------------------------- */ if (j_dof_dof[k] != j_domain_dof[j]) { aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]]; } else { diag = a_dof_dof[k]; } } h[i_global_to_local[j_domain_dof[j]]] = aux/diag; } for (j=i_domain_dof[i+1]-1; j >= i_domain_dof[i]; j--) { aux = x[j_domain_dof[j]]; for (k =i_dof_dof[j_domain_dof[j]+1]-1; k>=i_dof_dof[j_domain_dof[j]]; k--) if (i_global_to_local[j_dof_dof[k]] > -1) { /* this is a_{i_loc, j_loc} --------------------------------- */ if (j_dof_dof[k] != j_domain_dof[j]) { aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]]; } else { diag = a_dof_dof[k]; } } h[i_global_to_local[j_domain_dof[j]]] = aux/diag; } nu++; if (nu < nu_max) goto loop_nu; for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { P_t_coeff[j]= h[i_global_to_local[j_domain_dof[j]]]; i_global_to_local[j_domain_dof[j]] = -1; } } hypre_TFree(diag_dof_dof); hypre_TFree(x); hypre_TFree(h); hypre_TFree(i_global_to_local); P_t = hypre_CSRMatrixCreate(num_domains, num_dofs, i_domain_dof[num_domains]); hypre_CSRMatrixData(P_t) = P_t_coeff; hypre_CSRMatrixI(P_t) = i_domain_dof; hypre_CSRMatrixJ(P_t) = j_domain_dof; row_sum = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) row_sum[i] = 0.e0; for (i=0; i < num_domains; i++) for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) row_sum[j_domain_dof[j]]+=P_t_coeff[j]; delta = 0.e0; for (i=0; i < num_dofs; i++) delta+= (row_sum[i] - 1.e0)*(row_sum[i] - 1.e0); hypre_printf("\n unit row_sum deviation in seq_PU_interpolation: %le\n", sqrt(delta/num_dofs)); hypre_TFree(row_sum); ind = 1; ierr = hypre_CSRMatrixTranspose(P_t, &P, ind); *P_pointer = P; hypre_CSRMatrixI(P_t) = NULL; hypre_CSRMatrixJ(P_t) = NULL; hypre_CSRMatrixDestroy(P_t); return ierr; }
HYPRE_Int AmgCGCChoose (hypre_CSRMatrix *G,HYPRE_Int *vertexrange,HYPRE_Int mpisize,HYPRE_Int **coarse) /* chooses one grid for every processor * ============================================================ * G : the connectivity graph * map : the parallel layout * mpisize : number of procs * coarse : the chosen coarse grids * ===========================================================*/ { HYPRE_Int i,j,jj,p,choice,*processor,ierr=0; HYPRE_Int measure,new_measure; /* MPI_Comm comm = hypre_ParCSRMatrixComm(G); */ /* hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (G); */ /* hypre_ParCSRCommHandle *comm_handle; */ HYPRE_Real *G_data = hypre_CSRMatrixData (G); HYPRE_Real max; HYPRE_Int *G_i = hypre_CSRMatrixI(G); HYPRE_Int *G_j = hypre_CSRMatrixJ(G); hypre_CSRMatrix *H,*HT; HYPRE_Int *H_i,*H_j,*HT_i,*HT_j; HYPRE_Int jG,jH; HYPRE_Int num_vertices = hypre_CSRMatrixNumRows (G); HYPRE_Int *measure_array; HYPRE_Int *lists,*where; hypre_LinkList LoL_head = NULL; hypre_LinkList LoL_tail = NULL; processor = hypre_CTAlloc (HYPRE_Int,num_vertices); *coarse = hypre_CTAlloc (HYPRE_Int,mpisize); memset (*coarse,0,sizeof(HYPRE_Int)*mpisize); measure_array = hypre_CTAlloc (HYPRE_Int,num_vertices); lists = hypre_CTAlloc (HYPRE_Int,num_vertices); where = hypre_CTAlloc (HYPRE_Int,num_vertices); /* for (p=0;p<mpisize;p++) hypre_printf ("%d: %d-%d\n",p,range[p]+1,range[p+1]); */ /****************************************************************** * determine heavy edges ******************************************************************/ jG = G_i[num_vertices]; H = hypre_CSRMatrixCreate (num_vertices,num_vertices,jG); H_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1); H_j = hypre_CTAlloc (HYPRE_Int,jG); hypre_CSRMatrixI(H) = H_i; hypre_CSRMatrixJ(H) = H_j; for (i=0,p=0;i<num_vertices;i++) { while (vertexrange[p+1]<=i) p++; processor[i]=p; } H_i[0]=0; for (i=0,jj=0;i<num_vertices;i++) { #if 0 hypre_printf ("neighbors of grid %d:",i); #endif H_i[i+1]=H_i[i]; for (j=G_i[i],choice=-1,max=0;j<G_i[i+1];j++) { #if 0 if (G_data[j]>=0.0) hypre_printf ("G[%d,%d]=0. G_j(j)=%d, G_data(j)=%f.\n",i,G_j[j],j,G_data[j]); #endif /* G_data is always negative, so this test is sufficient */ if (choice==-1 || G_data[j]>max) { choice = G_j[j]; max = G_data[j]; } if (j==G_i[i+1]-1 || processor[G_j[j+1]] > processor[choice]) { /* we are done for this processor boundary */ H_j[jj++]=choice; H_i[i+1]++; #if 0 hypre_printf (" %d",choice); #endif choice = -1; max=0; } } #if 0 hypre_printf("\n"); #endif } /****************************************************************** * compute H^T, the transpose of H ******************************************************************/ jH = H_i[num_vertices]; HT = hypre_CSRMatrixCreate (num_vertices,num_vertices,jH); HT_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1); HT_j = hypre_CTAlloc (HYPRE_Int,jH); hypre_CSRMatrixI(HT) = HT_i; hypre_CSRMatrixJ(HT) = HT_j; for (i=0; i <= num_vertices; i++) HT_i[i] = 0; for (i=0; i < jH; i++) { HT_i[H_j[i]+1]++; } for (i=0; i < num_vertices; i++) { HT_i[i+1] += HT_i[i]; } for (i=0; i < num_vertices; i++) { for (j=H_i[i]; j < H_i[i+1]; j++) { HYPRE_Int myindex = H_j[j]; HT_j[HT_i[myindex]] = i; HT_i[myindex]++; } } for (i = num_vertices; i > 0; i--) { HT_i[i] = HT_i[i-1]; } HT_i[0] = 0; /***************************************************************** * set initial vertex weights *****************************************************************/ for (i=0;i<num_vertices;i++) { measure_array[i] = H_i[i+1] - H_i[i] + HT_i[i+1] - HT_i[i]; enter_on_lists (&LoL_head,&LoL_tail,measure_array[i],i,lists,where); } /****************************************************************** * apply CGC iteration ******************************************************************/ while (LoL_head && measure_array[LoL_head->head]) { choice = LoL_head->head; measure = measure_array[choice]; #if 0 hypre_printf ("Choice: %d, measure %d, processor %d\n",choice, measure,processor[choice]); fflush(stdout); #endif (*coarse)[processor[choice]] = choice+1; /* add one because coarsegrid indexing starts with 1, not 0 */ /* new maximal weight */ new_measure = measure+1; for (i=vertexrange[processor[choice]];i<vertexrange[processor[choice]+1];i++) { /* set weights for all remaining vertices on this processor to zero */ measure = measure_array[i]; remove_point (&LoL_head,&LoL_tail,measure,i,lists,where); measure_array[i]=0; } for (j=H_i[choice];j<H_i[choice+1];j++){ jj = H_j[j]; /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */ if (!(*coarse)[processor[jj]]) { measure = measure_array[jj]; remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where); enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where); measure_array[jj]=new_measure; } } for (j=HT_i[choice];j<HT_i[choice+1];j++) { jj = HT_j[j]; /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */ if (!(*coarse)[processor[jj]]) { measure = measure_array[jj]; remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where); enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where); measure_array[jj]=new_measure; } } } /* remove remaining list elements, if they exist. They all should have measure 0 */ while (LoL_head) { i = LoL_head->head; measure = measure_array[i]; #if 0 hypre_assert (measure==0); #endif remove_point (&LoL_head,&LoL_tail,measure,i,lists,where); } for (p=0;p<mpisize;p++) /* if the algorithm has not determined a coarse vertex for this proc, simply take the last one Do not take the first one, it might by empty! */ if (!(*coarse)[p]) { (*coarse)[p] = vertexrange[p+1]; /* hypre_printf ("choice for processor %d: %d\n",p,range[p]+1); */ } /******************************************** * clean up ********************************************/ hypre_CSRMatrixDestroy (H); hypre_CSRMatrixDestroy (HT); hypre_TFree (processor); hypre_TFree (measure_array); hypre_TFree (lists); hypre_TFree (where); return(ierr); }
/************************************************************** * * CGC Coarsening routine * **************************************************************/ HYPRE_Int hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int measure_type, HYPRE_Int coarsen_type, HYPRE_Int cgc_its, HYPRE_Int debug_flag, HYPRE_Int **CF_marker_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows(S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(S_offd); hypre_CSRMatrix *S_ext; HYPRE_Int *S_ext_i; HYPRE_Int *S_ext_j; hypre_CSRMatrix *ST; HYPRE_Int *ST_i; HYPRE_Int *ST_j; HYPRE_Int *CF_marker; HYPRE_Int *CF_marker_offd=NULL; HYPRE_Int ci_tilde = -1; HYPRE_Int ci_tilde_mark = -1; HYPRE_Int *measure_array; HYPRE_Int *measure_array_master; HYPRE_Int *graph_array; HYPRE_Int *int_buf_data=NULL; /*HYPRE_Int *ci_array=NULL;*/ HYPRE_Int i, j, k, l, jS; HYPRE_Int ji, jj, index; HYPRE_Int set_empty = 1; HYPRE_Int C_i_nonempty = 0; HYPRE_Int num_nonzeros; HYPRE_Int num_procs, my_id; HYPRE_Int num_sends = 0; HYPRE_Int first_col, start; HYPRE_Int col_0, col_n; hypre_LinkList LoL_head; hypre_LinkList LoL_tail; HYPRE_Int *lists, *where; HYPRE_Int measure, new_meas; HYPRE_Int num_left; HYPRE_Int nabor, nabor_two; HYPRE_Int ierr = 0; HYPRE_Int use_commpkg_A = 0; HYPRE_Real wall_time; HYPRE_Int measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */ if (coarsen_type < 0) coarsen_type = -coarsen_type; /*------------------------------------------------------- * Initialize the C/F marker, LoL_head, LoL_tail arrays *-------------------------------------------------------*/ LoL_head = NULL; LoL_tail = NULL; lists = hypre_CTAlloc(HYPRE_Int, num_variables); where = hypre_CTAlloc(HYPRE_Int, num_variables); #if 0 /* debugging */ char filename[256]; FILE *fp; HYPRE_Int iter = 0; #endif /*-------------------------------------------------------------- * Compute a CSR strength matrix, S. * * For now, the "strength" of dependence/influence is defined in * the following way: i depends on j if * aij > hypre_max (k != i) aik, aii < 0 * or * aij < hypre_min (k != i) aik, aii >= 0 * Then S_ij = 1, else S_ij = 0. * * NOTE: the entries are negative initially, corresponding * to "unaccounted-for" dependence. *----------------------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (!comm_pkg) { use_commpkg_A = 1; comm_pkg = hypre_ParCSRMatrixCommPkg(A); } if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd); jS = S_i[num_variables]; ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS); ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1); ST_j = hypre_CTAlloc(HYPRE_Int,jS); hypre_CSRMatrixI(ST) = ST_i; hypre_CSRMatrixJ(ST) = ST_j; /*---------------------------------------------------------- * generate transpose of S, ST *----------------------------------------------------------*/ for (i=0; i <= num_variables; i++) ST_i[i] = 0; for (i=0; i < jS; i++) { ST_i[S_j[i]+1]++; } for (i=0; i < num_variables; i++) { ST_i[i+1] += ST_i[i]; } for (i=0; i < num_variables; i++) { for (j=S_i[i]; j < S_i[i+1]; j++) { index = S_j[j]; ST_j[ST_i[index]] = i; ST_i[index]++; } } for (i = num_variables; i > 0; i--) { ST_i[i] = ST_i[i-1]; } ST_i[0] = 0; /*---------------------------------------------------------- * Compute the measures * * The measures are given by the row sums of ST. * Hence, measure_array[i] is the number of influences * of variable i. * correct actual measures through adding influences from * neighbor processors *----------------------------------------------------------*/ measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables); measure_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { measure_array_master[i] = ST_i[i+1]-ST_i[i]; } if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) { if (use_commpkg_A) S_ext = hypre_ParCSRMatrixExtractBExt(S,A,0); else S_ext = hypre_ParCSRMatrixExtractBExt(S,S,0); S_ext_i = hypre_CSRMatrixI(S_ext); S_ext_j = hypre_CSRMatrixJ(S_ext); num_nonzeros = S_ext_i[num_cols_offd]; first_col = hypre_ParCSRMatrixFirstColDiag(S); col_0 = first_col-1; col_n = col_0+num_variables; if (measure_type) { for (i=0; i < num_nonzeros; i++) { index = S_ext_j[i] - first_col; if (index > -1 && index < num_variables) measure_array_master[index]++; } } } /*--------------------------------------------------- * Loop until all points are either fine or coarse. *---------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); /* first coarsening phase */ /************************************************************* * * Initialize the lists * *************************************************************/ CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); num_left = 0; for (j = 0; j < num_variables; j++) { if ((S_i[j+1]-S_i[j])== 0 && (S_offd_i[j+1]-S_offd_i[j]) == 0) { CF_marker[j] = SF_PT; measure_array_master[j] = 0; } else { CF_marker[j] = UNDECIDED; /* num_left++; */ /* BM May 19, 2006: see below*/ } } if (coarsen_type==22) { /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: (a) the point has no strong connections at all, OR (b) the point has a strong connection across a boundary */ for (j=0;j<num_variables;j++) if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;} } for (l = 1; l <= cgc_its; l++) { LoL_head = NULL; LoL_tail = NULL; num_left = 0; /* compute num_left before each RS coarsening loop */ memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int)); memset (lists,0,sizeof(HYPRE_Int)*num_variables); memset (where,0,sizeof(HYPRE_Int)*num_variables); for (j = 0; j < num_variables; j++) { measure = measure_array[j]; if (CF_marker[j] != SF_PT) { if (measure > 0) { enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where); num_left++; /* compute num_left before each RS coarsening loop */ } else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only if j is not conained in a previously constructed coarse grid. Reason: these neighbors should start with the same initial weight in each CGC iteration. BM Aug 30, 2006 */ { if (measure < 0) hypre_printf("negative measure!\n"); /* CF_marker[j] = f_pnt; */ for (k = S_i[j]; k < S_i[j+1]; k++) { nabor = S_j[k]; /* if (CF_marker[nabor] != SF_PT) */ if (CF_marker[nabor] == 0) /* BM Aug 30, 2006: don't alter weights of points contained in other candidate coarse grids */ { if (nabor < j) { new_meas = measure_array[nabor]; if (new_meas > 0) remove_point(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); else num_left++; /* BM Aug 29, 2006 */ new_meas = ++(measure_array[nabor]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); } else { new_meas = ++(measure_array[nabor]); } } } /* --num_left; */ /* BM May 19, 2006 */ } } } /* BM Aug 30, 2006: first iteration: determine maximal weight */ if (num_left && l==1) measure_max = measure_array[LoL_head->head]; /* BM Aug 30, 2006: break CGC iteration if no suitable starting point is available any more */ if (!num_left || measure_array[LoL_head->head]<measure_max) { while (LoL_head) { hypre_LinkList list_ptr = LoL_head; LoL_head = LoL_head->next_elt; dispose_elt (list_ptr); } break; } /**************************************************************** * * Main loop of Ruge-Stueben first coloring pass. * * WHILE there are still points to classify DO: * 1) find first point, i, on list with max_measure * make i a C-point, remove it from the lists * 2) For each point, j, in S_i^T, * a) Set j to be an F-point * b) For each point, k, in S_j * move k to the list in LoL with measure one * greater than it occupies (creating new LoL * entry if necessary) * 3) For each point, j, in S_i, * move j to the list in LoL with measure one * smaller than it occupies (creating new LoL * entry if necessary) * ****************************************************************/ while (num_left > 0) { index = LoL_head -> head; /* index = LoL_head -> tail; */ /* CF_marker[index] = C_PT; */ CF_marker[index] = l; /* BM Aug 18, 2006 */ measure = measure_array[index]; measure_array[index] = 0; measure_array_master[index] = 0; /* BM May 19: for CGC */ --num_left; remove_point(&LoL_head, &LoL_tail, measure, index, lists, where); for (j = ST_i[index]; j < ST_i[index+1]; j++) { nabor = ST_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ measure = measure_array[nabor]; measure_array[nabor]=0; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) /* undecided point */ { measure = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, measure, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } for (j = S_i[index]; j < S_i[index+1]; j++) { nabor = S_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { measure = measure_array[nabor]; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); measure_array[nabor] = --measure; if (measure > 0) enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, lists, where); else { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) { new_meas = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } } } if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head); } l--; /* BM Aug 15, 2006 */ hypre_TFree(measure_array); hypre_TFree(measure_array_master); hypre_CSRMatrixDestroy(ST); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 1st pass = %f\n", my_id, wall_time); } hypre_TFree(lists); hypre_TFree(where); if (num_procs>1) { if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen CGC = %f\n", my_id, wall_time); } } else { /* the first candiate coarse grid is the coarse grid */ for (j=0;j<num_variables;j++) { if (CF_marker[j]==1) CF_marker[j]=C_PT; else CF_marker[j]=F_PT; } } /* BM May 19, 2006: Set all undecided points to be fine grid points. */ for (j=0;j<num_variables;j++) if (!CF_marker[j]) CF_marker[j]=F_PT; /*--------------------------------------------------- * Initialize the graph array *---------------------------------------------------*/ graph_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { graph_array[i] = -1; } if (debug_flag == 3) wall_time = time_getWallclockSeconds(); for (i=0; i < num_variables; i++) { if (ci_tilde_mark != i) ci_tilde = -1; if (CF_marker[i] == -1) { for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] > 0) graph_array[j] = i; } for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] == -1) { set_empty = 1; for (jj = S_i[j]; jj < S_i[j+1]; jj++) { index = S_j[jj]; if (graph_array[index] == i) { set_empty = 0; break; } } if (set_empty) { if (C_i_nonempty) { CF_marker[i] = 1; if (ci_tilde > -1) { CF_marker[ci_tilde] = -1; ci_tilde = -1; } C_i_nonempty = 0; break; } else { ci_tilde = j; ci_tilde_mark = i; CF_marker[j] = 1; C_i_nonempty = 1; i--; break; } } } } } } if (debug_flag == 3 && coarsen_type != 2) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 2nd pass = %f\n", my_id, wall_time); } /* third pass, check boundary fine points for coarse neighbors */ /*------------------------------------------------ * Exchange boundary data for CF_marker *------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (num_procs > 1) { comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d CGC boundary fix = %f\n", my_id, wall_time); } /*--------------------------------------------------- * Clean up and return *---------------------------------------------------*/ /*if (coarsen_type != 1) { */ if (CF_marker_offd) hypre_TFree(CF_marker_offd); /* BM Aug 21, 2006 */ if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */ /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */ /*} */ hypre_TFree(graph_array); if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) hypre_CSRMatrixDestroy(S_ext); *CF_marker_ptr = CF_marker; return (ierr); }
void hypre_ParMatScaleDiagInv_F( hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * A, double weight, HYPRE_Int * CF_marker ) /* hypre_ParMatScaleDiagInv scales certain rows of its first * argument by premultiplying with a submatrix of the inverse of * the diagonal of its second argument; and _also_ multiplying by the scalar * third argument. * The marker array determines rows are changed and which diagonal elements * are used. */ { /* If A=(Aij),C=(Cik), i&j in Fine+Coarse, k in Coarse, we want new Cik = (1/aii)*Cik, for Fine i only, all k. Unlike a matmul, this computation is purely local, only the diag blocks are involved. */ hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); double *C_diag_data = hypre_CSRMatrixData(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int i1, i2; HYPRE_Int jj2, jj3; double a_entry; /*----------------------------------------------------------------------- * Loop over C_diag rows. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_C; i1++) { if ( CF_marker[i1] < 0 ) /* Fine data only */ { /*----------------------------------------------------------------- * Loop over A_diag data *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if ( i1==i2 ) /* diagonal of A only */ { a_entry = A_diag_data[jj2] * weight; /*----------------------------------------------------------- * Loop over entries in current row of C_diag. *-----------------------------------------------------------*/ for (jj3 = C_diag_i[i2]; jj3 < C_diag_i[i2+1]; jj3++) { C_diag_data[jj3] = C_diag_data[jj3] / a_entry; } /*----------------------------------------------------------- * Loop over entries in current row of C_offd. *-----------------------------------------------------------*/ if ( num_cols_offd_C ) { for (jj3 = C_offd_i[i2]; jj3 < C_offd_i[i2+1]; jj3++) { C_offd_data[jj3] = C_offd_data[jj3] / a_entry; } } } } } } }
/* Delete any matrix entry C(i,j) for which the corresponding entry P(i,j) doesn't exist - but only for "fine" rows C(i)<0 This is done as a purely local computation - C and P must have the same data distribution (among processors). */ void hypre_ParCSRMatrixDropEntries( hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker ) { hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_C_diag_i; HYPRE_Int *new_C_offd_i; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(C_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(C_offd); double vmax = 0.0; double vmin = 0.0; double v, old_sum, new_sum, scale; HYPRE_Int i1, m, m1d, m1o, jC, mP, keep; /* Repack the i,j,and data arrays of C so as to discard those elements for which there is no corresponding element in P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_C_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_C+1 ); new_C_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_C+1 ); m1d = C_diag_i[0]; m1o = C_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { old_sum = 0; new_sum = 0; for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m ) { v = C_diag_data[m]; jC = C_diag_j[m]; old_sum += v; /* Do we know anything about the order of P_diag_j? It would be better not to search through it all here. If we know nothing, some ordering or index scheme will be needed for efficiency (worth doing iff this function gets called at all ) (may2006: this function is no longer called) */ keep=0; for ( mP=P_diag_i[i1]; mP<P_diag_i[i1+1]; ++mP ) { if ( jC==P_diag_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || keep==1 ) { /* keep v in C */ new_sum += v; C_diag_j[m1d] = C_diag_j[m]; C_diag_data[m1d] = C_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m ) { v = C_offd_data[m]; jC = C_diag_j[m]; old_sum += v; keep=0; for ( mP=P_offd_i[i1]; mP<P_offd_i[i1+1]; ++mP ) { if ( jC==P_offd_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || v>=vmax || v<=vmin ) { /* keep v in C */ new_sum += v; C_offd_j[m1o] = C_offd_j[m]; C_offd_data[m1o] = C_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_C_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_C ) new_C_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_C_diag_i[i1]; m<new_C_diag_i[i1+1]; ++m ) C_diag_data[m] *= scale; if ( i1<num_rows_offd_C ) /* this test fails when there is no offd block */ for ( m=new_C_offd_i[i1]; m<new_C_offd_i[i1+1]; ++m ) C_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_C; i1++ ) { C_diag_i[i1] = new_C_diag_i[i1]; if ( i1<num_rows_offd_C ) C_offd_i[i1] = new_C_offd_i[i1]; } hypre_TFree( new_C_diag_i ); if ( num_rows_offd_C>0 ) hypre_TFree( new_C_offd_i ); hypre_CSRMatrixNumNonzeros(C_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(C_offd) = num_nonzeros_offd; /* SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce. I suspect, but don't know, that other parts of hypre do not assume that the correct values have been set. hypre_ParCSRMatrixSetNumNonzeros( C ); hypre_ParCSRMatrixSetDNumNonzeros( C );*/ hypre_ParCSRMatrixNumNonzeros( C ) = 0; hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0; }
int hypre_CSRMatrixMatvec( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); int *A_rownnz = hypre_CSRMatrixRownnz(A); int num_rownnz = hypre_CSRMatrixNumRownnz(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp, tempx; int i, j, jj; int m; double xpar=0.7; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows)) { for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = y_data[m]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] = tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = y_data[ j*vecstride_y + m*idxstride_y ]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] = tempx; } } } else { #pragma omp parallel for private(i,jj,temp) schedule(static) for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } else for ( j=0; j<num_vectors; ++j ) { temp = y_data[ j*vecstride_y + i*idxstride_y ]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] = temp; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
int hypre_CSRMatrixMatvec_FF( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y, int *CF_marker_x, int *CF_marker_y, int fpt ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); double temp; int i, jj; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] = 0.0; } else { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ for (i = 0; i < num_rows; i++) { if (CF_marker_x[i] == fpt) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) if (CF_marker_y[A_j[jj]] == fpt) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= alpha; } return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; int i, i1, j, jv, jj, ns, ne, size, rest; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { for (i1 = 0; i1 < num_threads; i1++) { size = num_cols/num_threads; rest = num_cols - size*num_threads; if (i1 < rest) { ns = i1*size+i1-1; ne = (i1+1)*size+i1+1; } else { ns = i1*size+rest-1; ne = (i1+1)*size+rest; } if ( num_vectors==1 ) { for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[j] += A_data[jj] * x_data[i]; } } } else { for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } } } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
/***************************************************************************** * * Routine for constructing graph domain_dof with minimal overlap * and computing the respective matrix inverses to be * used in an overlapping Schwarz procedure (like smoother * in AMG); * *****************************************************************************/ HYPRE_Int hypre_AMGCreateDomainDof(hypre_CSRMatrix *A, HYPRE_Int **i_domain_dof_pointer, HYPRE_Int **j_domain_dof_pointer, HYPRE_Real **domain_matrixinverse_pointer, HYPRE_Int *num_domains_pointer) { HYPRE_Int *i_domain_dof, *j_domain_dof; HYPRE_Real *domain_matrixinverse; HYPRE_Int num_domains; HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A); HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A); HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A); HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A); /* HYPRE_Int *i_dof_to_accept_weight; */ HYPRE_Int *i_dof_to_prefer_weight, *w_dof_dof, *i_dof_weight; HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof; HYPRE_Int *i_dof_index; HYPRE_Int ierr = 0; HYPRE_Int i,j,k, l_loc, i_loc, j_loc; HYPRE_Int i_dof; HYPRE_Int *i_local_to_global; HYPRE_Int *i_global_to_local; HYPRE_Int local_dof_counter, max_local_dof_counter=0; HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0; HYPRE_Real *AE, *XE; /* PCG arrays: --------------------------------------------------- */ /* HYPRE_Real *x, *rhs, *v, *w, *d, *aux; HYPRE_Int max_iter; */ /* --------------------------------------------------------------------- */ /*=======================================================================*/ /* create artificial domains by agglomeration; */ /*=======================================================================*/ hypre_printf("----------- create artificials domain by agglomeration; ======\n"); i_dof_to_prefer_weight = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); w_dof_dof = (HYPRE_Int *) malloc(i_dof_dof[num_dofs] * sizeof(HYPRE_Int)); for (i=0; i < num_dofs; i++) i_dof_to_prefer_weight[i] = 0; for (i=0; i<num_dofs; i++) for (j=i_dof_dof[i]; j< i_dof_dof[i+1]; j++) { if (j_dof_dof[j] == i) w_dof_dof[j]=0; else w_dof_dof[j]=1; } hypre_printf("end computing weights for agglomeration procedure: --------\n"); i_dof_weight = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); i_aggregate_dof = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); j_aggregate_dof= (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); ierr = hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof, i_dof_dof, j_dof_dof, w_dof_dof, i_dof_dof, j_dof_dof, i_dof_dof, j_dof_dof, i_dof_to_prefer_weight, i_dof_weight, num_dofs, num_dofs, &num_domains); hypre_printf("num_dofs: %d, num_domains: %d\n", num_dofs, num_domains); i_dof_to_aggregate = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); for (i=0; i < num_domains; i++) for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) i_dof_to_aggregate[j_aggregate_dof[j]] = i; /* hypre_printf("========================================================\n"); hypre_printf("== artificial non--overlapping domains (aggregates): ===\n"); hypre_printf("========================================================\n"); for (i=0; i < num_domains; i++) { hypre_printf("\n aggregate %d:\n", i); for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) hypre_printf("%d, ", j_aggregate_dof[j]); hypre_printf("\n"); } */ free(i_dof_to_prefer_weight); free(i_dof_weight); free(w_dof_dof); /* make domains from aggregates: *********************************/ i_domain_dof = (HYPRE_Int *) malloc((num_domains+1) * sizeof(HYPRE_Int)); i_dof_index = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); for (i=0; i < num_dofs; i++) i_dof_index[i] = -1; domain_dof_counter=0; for (i=0; i < num_domains; i++) { i_domain_dof[i] = domain_dof_counter; for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) if (i_dof_to_aggregate[j_dof_dof[k]] >= i && i_dof_index[j_dof_dof[k]]==-1) { i_dof_index[j_dof_dof[k]]++; domain_dof_counter++; } for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) i_dof_index[j_dof_dof[k]]=-1; } i_domain_dof[num_domains] = domain_dof_counter; j_domain_dof = (HYPRE_Int *) malloc(domain_dof_counter * sizeof(HYPRE_Int)); domain_dof_counter=0; for (i=0; i < num_domains; i++) { for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) if (i_dof_to_aggregate[j_dof_dof[k]] >= i && i_dof_index[j_dof_dof[k]]==-1) { i_dof_index[j_dof_dof[k]]++; j_domain_dof[domain_dof_counter] = j_dof_dof[k]; domain_dof_counter++; } for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) i_dof_index[j_dof_dof[k]]=-1; } free(i_aggregate_dof); free(j_aggregate_dof); free(i_dof_to_aggregate); /* i_domain_dof = i_aggregate_dof; j_domain_dof = j_aggregate_dof; */ hypre_printf("END domain_dof computations: =================================\n"); domain_matrixinverse_counter = 0; local_dof_counter = 0; for (i=0; i < num_domains; i++) { local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i]; domain_matrixinverse_counter+= local_dof_counter * local_dof_counter; if (local_dof_counter > max_local_dof_counter) max_local_dof_counter = local_dof_counter; } domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter); i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); AE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); XE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); /* i_dof_index = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); */ i_global_to_local = i_dof_index; for (i=0; i < num_dofs; i++) i_global_to_local[i] = -1; domain_matrixinverse_counter = 0; for (i=0; i < num_domains; i++) { local_dof_counter = 0; for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { i_global_to_local[j_domain_dof[j]] = local_dof_counter; i_local_to_global[local_dof_counter] = j_domain_dof[j]; local_dof_counter++; } /* get local matrix in AE: ======================================== */ for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) AE[i_loc + j_loc * local_dof_counter] = 0.e0; for (i_loc=0; i_loc < local_dof_counter; i_loc++) { i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j]; } } /* get block for Schwarz smoother: ============================= */ ierr = matinv(XE, AE, local_dof_counter); /* hypre_printf("ierr_AE_inv: %d\n", ierr); */ for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * local_dof_counter] = XE[i_loc + j_loc * local_dof_counter]; domain_matrixinverse_counter+=local_dof_counter*local_dof_counter; for (l_loc=0; l_loc < local_dof_counter; l_loc++) i_global_to_local[i_local_to_global[l_loc]] = -1; } hypre_TFree(i_local_to_global); hypre_TFree(AE); hypre_TFree(XE); hypre_TFree(i_dof_index); *i_domain_dof_pointer = i_domain_dof; *j_domain_dof_pointer = j_domain_dof; *num_domains_pointer = num_domains; *domain_matrixinverse_pointer = domain_matrixinverse; /* x = hypre_CTAlloc(HYPRE_Real, num_dofs); rhs = hypre_CTAlloc(HYPRE_Real, num_dofs); v = hypre_CTAlloc(HYPRE_Real, num_dofs); w = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); aux = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) x[i] = 0.e0; for (i=0; i < num_dofs; i++) rhs[i] = rand(); max_iter = 1000; hypre_printf("\nenter SchwarzPCG: =======================================\n"); ierr = hypre_Schwarzpcg(x, rhs, a_dof_dof, i_dof_dof, j_dof_dof, i_domain_dof, j_domain_dof, domain_matrixinverse, num_domains, v, w, d, aux, max_iter, num_dofs); hypre_printf("\n\n=======================================================\n"); hypre_printf(" END test PCG solve: \n"); hypre_printf("===========================================================\n"); hypre_TFree(x); hypre_TFree(rhs); hypre_TFree(aux); hypre_TFree(v); hypre_TFree(w); hypre_TFree(d); hypre_TFree(i_domain_dof); hypre_TFree(j_domain_dof); hypre_TFree(domain_matrixinverse); */ return ierr; }
HYPRE_Int hypre_SchwarzSolve(hypre_CSRMatrix *A, hypre_Vector *rhs_vector, HYPRE_Int num_domains, HYPRE_Int *i_domain_dof, HYPRE_Int *j_domain_dof, HYPRE_Real *domain_matrixinverse, hypre_Vector *x_vector, hypre_Vector *aux_vector) { HYPRE_Int ierr = 0; /* HYPRE_Int num_dofs; */ HYPRE_Int *i_dof_dof; HYPRE_Int *j_dof_dof; HYPRE_Real *a_dof_dof; HYPRE_Real *x; HYPRE_Real *rhs; HYPRE_Real *aux; HYPRE_Int i,j,k, j_loc, k_loc; HYPRE_Int matrix_size, matrix_size_counter = 0; /* initiate: ----------------------------------------------- */ /* num_dofs = hypre_CSRMatrixNumRows(A); */ i_dof_dof = hypre_CSRMatrixI(A); j_dof_dof = hypre_CSRMatrixJ(A); a_dof_dof = hypre_CSRMatrixData(A); x = hypre_VectorData(x_vector); rhs = hypre_VectorData(rhs_vector); aux = hypre_VectorData(aux_vector); /* for (i=0; i < num_dofs; i++) x[i] = 0.e0; */ /* forward solve: ----------------------------------------------- */ matrix_size_counter = 0; for (i=0; i < num_domains; i++) { matrix_size = i_domain_dof[i+1] - i_domain_dof[i]; /* compute residual: ---------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux[j_domain_dof[j]] = rhs[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) aux[j_domain_dof[j]] -= a_dof_dof[k] * x[j_dof_dof[k]]; } /* solve for correction: ------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { j_loc = j-i_domain_dof[i]; for (k=i_domain_dof[i]; k < i_domain_dof[i+1]; k++) { k_loc = k-i_domain_dof[i]; x[j_domain_dof[j]]+= domain_matrixinverse[matrix_size_counter + j_loc + k_loc * matrix_size] * aux[j_domain_dof[k]]; } } matrix_size_counter += matrix_size * matrix_size; } /* backward solve: ------------------------------------------------ */ for (i=num_domains-1; i > -1; i--) { matrix_size = i_domain_dof[i+1] - i_domain_dof[i]; matrix_size_counter -= matrix_size * matrix_size; /* compute residual: ---------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux[j_domain_dof[j]] = rhs[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) aux[j_domain_dof[j]] -= a_dof_dof[k] * x[j_dof_dof[k]]; } /* solve for correction: ------------------------------------- */ for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { j_loc = j-i_domain_dof[i]; for (k=i_domain_dof[i]; k < i_domain_dof[i+1]; k++) { k_loc = k-i_domain_dof[i]; x[j_domain_dof[j]]+= domain_matrixinverse[matrix_size_counter + j_loc + k_loc * matrix_size] * aux[j_domain_dof[k]]; } } } return ierr; }
HYPRE_Int hypre_AMGNodalSchwarzSmoother( hypre_CSRMatrix *A, HYPRE_Int *dof_func, HYPRE_Int num_functions, HYPRE_Int option, HYPRE_Int **i_domain_dof_pointer, HYPRE_Int **j_domain_dof_pointer, HYPRE_Real **domain_matrixinverse_pointer, HYPRE_Int *num_domains_pointer) { /* option = 0: nodal symGS; 1: next to nodal symGS (overlapping Schwarz) */ HYPRE_Int *i_domain_dof, *j_domain_dof; HYPRE_Real *domain_matrixinverse; HYPRE_Int num_domains; HYPRE_Int *i_dof_node, *j_dof_node; HYPRE_Int *i_node_dof, *j_node_dof; HYPRE_Int *i_node_dof_dof, *j_node_dof_dof; HYPRE_Int *i_node_node, *j_node_node; HYPRE_Int num_nodes; HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A); HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A); HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A); HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A); HYPRE_Int ierr = 0; HYPRE_Int i,j,k, l_loc, i_loc, j_loc; HYPRE_Int i_dof, j_dof; HYPRE_Int *i_local_to_global; HYPRE_Int *i_global_to_local; HYPRE_Int *i_int; HYPRE_Int *i_int_to_local; HYPRE_Int int_dof_counter, local_dof_counter, max_local_dof_counter=0; HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0; HYPRE_Real *AE, *XE; /* PCG arrays: --------------------------------------------------- HYPRE_Real *x, *rhs, *v, *w, *d, *aux; HYPRE_Int max_iter; ------------------------------------------------------------------ */ /* build dof_node graph: ----------------------------------------- */ num_nodes = num_dofs / num_functions; hypre_printf("\nnum_nodes: %d, num_dofs: %d = %d x %d\n", num_nodes, num_dofs, num_nodes, num_functions); i_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs+1); j_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs); for (i=0; i < num_dofs+1; i++) i_dof_node[i] = i; for (j = 0; j < num_nodes; j++) for (k = 0; k < num_functions; k++) j_dof_node[j*num_functions+k] = j; /* build node_dof graph: ----------------------------------------- */ ierr = transpose_matrix_create(&i_node_dof, &j_node_dof, i_dof_node, j_dof_node, num_dofs, num_nodes); /* build node_node graph: ----------------------------------------- */ ierr = matrix_matrix_product(&i_node_dof_dof, &j_node_dof_dof, i_node_dof, j_node_dof, i_dof_dof, j_dof_dof, num_nodes, num_dofs, num_dofs); ierr = matrix_matrix_product(&i_node_node, &j_node_node, i_node_dof_dof, j_node_dof_dof, i_dof_node, j_dof_node, num_nodes, num_dofs, num_nodes); hypre_TFree(i_node_dof_dof); hypre_TFree(j_node_dof_dof); /* compute for each node the local information: -------------------- */ i_global_to_local = i_dof_node; for (i_dof =0; i_dof < num_dofs; i_dof++) i_global_to_local[i_dof] = -1; domain_matrixinverse_counter = 0; domain_dof_counter = 0; for (i=0; i < num_nodes; i++) { local_dof_counter = 0; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; if (i_global_to_local[j_dof] < 0) { i_global_to_local[j_dof] = local_dof_counter; local_dof_counter++; } } domain_matrixinverse_counter += local_dof_counter*local_dof_counter; domain_dof_counter += local_dof_counter; if (local_dof_counter > max_local_dof_counter) max_local_dof_counter = local_dof_counter; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; i_global_to_local[j_dof] = -1; } } num_domains = num_nodes; i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1); if (option == 1) j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter); else j_domain_dof = hypre_CTAlloc(HYPRE_Int, num_dofs); if (option == 1) domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter); else domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, num_dofs * num_functions); i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); AE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); XE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); i_int_to_local = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); for (l_loc=0; l_loc < max_local_dof_counter; l_loc++) i_int[l_loc] = -1; domain_dof_counter = 0; domain_matrixinverse_counter = 0; for (i=0; i < num_nodes; i++) { i_domain_dof[i] = domain_dof_counter; local_dof_counter = 0; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; if (i_global_to_local[j_dof] < 0) { i_global_to_local[j_dof] = local_dof_counter; i_local_to_global[local_dof_counter] = j_dof; local_dof_counter++; } } for (j=i_node_dof[i]; j < i_node_dof[i+1]; j++) for (k=i_dof_dof[j_node_dof[j]]; k < i_dof_dof[j_node_dof[j]+1]; k++) if (i_global_to_local[j_dof_dof[k]] < 0) hypre_printf("WRONG local indexing: ====================== \n"); int_dof_counter = 0; for (k=i_node_dof[i]; k < i_node_dof[i+1]; k++) { i_dof = j_node_dof[k]; i_loc = i_global_to_local[i_dof]; i_int[i_loc] = int_dof_counter; i_int_to_local[int_dof_counter] = i_loc; int_dof_counter++; } /* get local matrix AE: ======================================== */ if (option == 1) { for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) AE[i_loc + j_loc * local_dof_counter] = 0.e0; for (i_loc=0; i_loc < local_dof_counter; i_loc++) { i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j]; } } /* get block for Schwarz smoother: ============================= */ ierr = matinv(XE, AE, local_dof_counter); /* hypre_printf("ierr_AE_inv: %d\n", ierr); */ } if (option == 1) for (i_loc=0; i_loc < local_dof_counter; i_loc++) j_domain_dof[domain_dof_counter+i_loc] = i_local_to_global[i_loc]; if (option == 1) for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * local_dof_counter] = XE[i_loc + j_loc * local_dof_counter]; if (option == 0) { for (i_loc=0; i_loc < int_dof_counter; i_loc++) for (j_loc=0; j_loc < int_dof_counter; j_loc++) AE[i_loc + j_loc * int_dof_counter] = 0.e0; for (l_loc=0; l_loc < int_dof_counter; l_loc++) { i_loc = i_int_to_local[l_loc]; i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) if (i_int[j_loc] >=0) AE[i_loc + i_int[j_loc] * int_dof_counter] = a_dof_dof[j]; } } ierr = matinv(XE, AE, int_dof_counter); for (i_loc=0; i_loc < int_dof_counter; i_loc++) { j_domain_dof[domain_dof_counter + i_loc] = i_local_to_global[i_int_to_local[i_loc]]; for (j_loc=0; j_loc < int_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * int_dof_counter] = XE[i_loc + j_loc * int_dof_counter]; } domain_dof_counter+=int_dof_counter; domain_matrixinverse_counter+=int_dof_counter*int_dof_counter; } else { domain_dof_counter+=local_dof_counter; domain_matrixinverse_counter+=local_dof_counter*local_dof_counter; } for (l_loc=0; l_loc < local_dof_counter; l_loc++) { i_int[l_loc] = -1; i_global_to_local[i_local_to_global[l_loc]] = -1; } } i_domain_dof[num_nodes] = domain_dof_counter; hypre_TFree(i_dof_node); hypre_TFree(j_dof_node); hypre_TFree(i_node_dof); hypre_TFree(j_node_dof); hypre_TFree(i_node_node); hypre_TFree(j_node_node); hypre_TFree(i_int); hypre_TFree(i_int_to_local); hypre_TFree(i_local_to_global); hypre_TFree(AE); hypre_TFree(XE); *i_domain_dof_pointer = i_domain_dof; *j_domain_dof_pointer = j_domain_dof; *num_domains_pointer = num_domains; *domain_matrixinverse_pointer = domain_matrixinverse; /* hypre_printf("exit *Schwarz*: ===============================\n\n"); */ /* ----------------------------------------------------------------- x = hypre_CTAlloc(HYPRE_Real, num_dofs); rhs = hypre_CTAlloc(HYPRE_Real, num_dofs); v = hypre_CTAlloc(HYPRE_Real, num_dofs); w = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); aux = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) x[i] = 0.e0; for (i=0; i < num_dofs; i++) rhs[i] = rand(); max_iter = 1000; hypre_printf("\nenter SchwarzPCG: =======================================\n"); ierr = hypre_Schwarzpcg(x, rhs, a_dof_dof, i_dof_dof, j_dof_dof, i_domain_dof, j_domain_dof, domain_matrixinverse, num_domains, v, w, d, aux, max_iter, num_dofs); hypre_printf("\n\n=======================================================\n"); hypre_printf(" END test PCG solve: \n"); hypre_printf("===========================================================\n"); hypre_TFree(x); hypre_TFree(rhs); hypre_TFree(aux); hypre_TFree(v); hypre_TFree(w); hypre_TFree(d); ----------------------------------------------------------------------- */ return ierr; }
HYPRE_Int AmgCGCGraphAssemble (hypre_ParCSRMatrix *S,HYPRE_Int *vertexrange,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd,HYPRE_Int coarsen_type, HYPRE_IJMatrix *ijG) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * vertexrange : the parallel layout of the candidate coarse grid vertices * CF_marker, CF_marker_offd : the coarse/fine markers * coarsen_type : the coarsening type * ijG : the created graph * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int i,/* ii,*/ip,j,jj,m,n,p; HYPRE_Int mpisize,mpirank; HYPRE_Real weight; MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ HYPRE_IJMatrix ijmatrix; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); /* HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); */ /* HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); */ HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); HYPRE_Int pointrange_start,pointrange_end; HYPRE_Int *pointrange,*pointrange_nonlocal,*pointrange_strong=NULL; HYPRE_Int vertexrange_start,vertexrange_end; HYPRE_Int *vertexrange_strong= NULL; HYPRE_Int *vertexrange_nonlocal; HYPRE_Int num_recvs,num_recvs_strong; HYPRE_Int *recv_procs,*recv_procs_strong=NULL; HYPRE_Int /* *zeros,*rownz,*/*rownz_diag,*rownz_offd; HYPRE_Int nz; HYPRE_Int nlocal; HYPRE_Int one=1; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs (comm_pkg); pointrange = hypre_ParCSRMatrixRowStarts (S); pointrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); vertexrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs (comm_pkg); HYPRE_Int *int_buf_data = hypre_CTAlloc (HYPRE_Int,4*num_sends); HYPRE_Int *int_buf_data2 = int_buf_data + 2*num_sends; hypre_MPI_Request *sendrequest,*recvrequest; nlocal = vertexrange[1] - vertexrange[0]; pointrange_start = pointrange[0]; pointrange_end = pointrange[1]; vertexrange_start = vertexrange[0]; vertexrange_end = vertexrange[1]; sendrequest = hypre_CTAlloc (hypre_MPI_Request,2*(num_sends+num_recvs)); recvrequest = sendrequest+2*num_sends; for (i=0;i<num_recvs;i++) { hypre_MPI_Irecv (pointrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_pointrange,comm,&recvrequest[2*i]); hypre_MPI_Irecv (vertexrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_vertexrange,comm,&recvrequest[2*i+1]); } for (i=0;i<num_sends;i++) { int_buf_data[2*i] = pointrange_start; int_buf_data[2*i+1] = pointrange_end; int_buf_data2[2*i] = vertexrange_start; int_buf_data2[2*i+1] = vertexrange_end; hypre_MPI_Isend (int_buf_data+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_pointrange,comm,&sendrequest[2*i]); hypre_MPI_Isend (int_buf_data2+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_vertexrange,comm,&sendrequest[2*i+1]); } hypre_MPI_Waitall (2*(num_sends+num_recvs),sendrequest,hypre_MPI_STATUSES_IGNORE); hypre_TFree (int_buf_data); hypre_TFree (sendrequest); } #else nlocal = vertexrange[mpirank+1] - vertexrange[mpirank]; pointrange_start = pointrange[mpirank]; pointrange_end = pointrange[mpirank+1]; vertexrange_start = vertexrange[mpirank]; vertexrange_end = vertexrange[mpirank+1]; for (i=0;i<num_recvs;i++) { pointrange_nonlocal[2*i] = pointrange[recv_procs[i]]; pointrange_nonlocal[2*i+1] = pointrange[recv_procs[i]+1]; vertexrange_nonlocal[2*i] = vertexrange[recv_procs[i]]; vertexrange_nonlocal[2*i+1] = vertexrange[recv_procs[i]+1]; } #endif /* now we have the array recv_procs. However, it may contain too many entries as it is inherited from A. We now have to determine the subset which contains only the strongly connected neighbors */ if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); recv_procs_strong = hypre_CTAlloc (HYPRE_Int,num_recvs); memset (recv_procs_strong,0,num_recvs*sizeof(HYPRE_Int)); /* don't forget to shorten the pointrange and vertexrange arrays accordingly */ pointrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (pointrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); vertexrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (vertexrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); for (i=0;i<num_variables;i++) for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = col_map_offd[S_offd_j[j]]; for (p=0;p<num_recvs;p++) /* S_offd_j is NOT sorted! */ if (jj >= pointrange_nonlocal[2*p] && jj < pointrange_nonlocal[2*p+1]) break; #if 0 hypre_printf ("Processor %d, remote point %d on processor %d\n",mpirank,jj,recv_procs[p]); #endif recv_procs_strong [p]=1; } for (p=0,num_recvs_strong=0;p<num_recvs;p++) { if (recv_procs_strong[p]) { recv_procs_strong[num_recvs_strong]=recv_procs[p]; pointrange_strong[2*num_recvs_strong] = pointrange_nonlocal[2*p]; pointrange_strong[2*num_recvs_strong+1] = pointrange_nonlocal[2*p+1]; vertexrange_strong[2*num_recvs_strong] = vertexrange_nonlocal[2*p]; vertexrange_strong[2*num_recvs_strong+1] = vertexrange_nonlocal[2*p+1]; num_recvs_strong++; } } } else num_recvs_strong=0; hypre_TFree (pointrange_nonlocal); hypre_TFree (vertexrange_nonlocal); rownz_diag = hypre_CTAlloc (HYPRE_Int,2*nlocal); rownz_offd = rownz_diag + nlocal; for (p=0,nz=0;p<num_recvs_strong;p++) { nz += vertexrange_strong[2*p+1]-vertexrange_strong[2*p]; } for (m=0;m<nlocal;m++) { rownz_diag[m]=nlocal-1; rownz_offd[m]=nz; } HYPRE_IJMatrixCreate(comm, vertexrange_start, vertexrange_end-1, vertexrange_start, vertexrange_end-1, &ijmatrix); HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); HYPRE_IJMatrixSetDiagOffdSizes (ijmatrix, rownz_diag, rownz_offd); HYPRE_IJMatrixInitialize(ijmatrix); hypre_TFree (rownz_diag); /* initialize graph */ weight = -1; for (m=vertexrange_start;m<vertexrange_end;m++) { for (p=0;p<num_recvs_strong;p++) { for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while initializing graphs at (%d, %d)\n",mpirank,ierr,m,n); #endif } } } /* weight graph */ for (i=0;i<num_variables;i++) { for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = S_offd_j[j]; /* jj is not a global index!!! */ /* determine processor */ for (p=0;p<num_recvs_strong;p++) if (col_map_offd[jj] >= pointrange_strong[2*p] && col_map_offd[jj] < pointrange_strong[2*p+1]) break; ip=recv_procs_strong[p]; /* loop over all coarse grids constructed on this processor domain */ for (m=vertexrange_start;m<vertexrange_end;m++) { /* loop over all coarse grids constructed on neighbor processor domain */ for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { /* coarse grid counting inside gridpartition->local/gridpartition->nonlocal starts with one while counting inside range starts with zero */ if (CF_marker[i]-1==m && CF_marker_offd[jj]-1==n) /* C-C-coupling */ weight = -1; else if ( (CF_marker[i]-1==m && (CF_marker_offd[jj]==0 || CF_marker_offd[jj]-1!=n) ) || ( (CF_marker[i]==0 || CF_marker[i]-1!=m) && CF_marker_offd[jj]-1==n ) ) /* C-F-coupling */ weight = 0; else weight = -8; /* F-F-coupling */ ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while adding %lf to entry (%d, %d)\n",mpirank,ierr,weight,m,n); #endif } } } } /* assemble */ HYPRE_IJMatrixAssemble (ijmatrix); /*if (num_recvs_strong) {*/ hypre_TFree (recv_procs_strong); hypre_TFree (pointrange_strong); hypre_TFree (vertexrange_strong); /*} */ *ijG = ijmatrix; return (ierr); }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
HYPRE_Int hypre_BoomerAMGRelaxT( hypre_ParCSRMatrix *A, hypre_ParVector *f, HYPRE_Int *cf_marker, HYPRE_Int relax_type, HYPRE_Int relax_points, HYPRE_Real relax_weight, hypre_ParVector *u, hypre_ParVector *Vtemp ) { hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int n_global= hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int n = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_index = hypre_ParVectorFirstIndex(u); hypre_Vector *u_local = hypre_ParVectorLocalVector(u); HYPRE_Real *u_data = hypre_VectorData(u_local); hypre_Vector *Vtemp_local = hypre_ParVectorLocalVector(Vtemp); HYPRE_Real *Vtemp_data = hypre_VectorData(Vtemp_local); hypre_CSRMatrix *A_CSR; HYPRE_Int *A_CSR_i; HYPRE_Int *A_CSR_j; HYPRE_Real *A_CSR_data; hypre_Vector *f_vector; HYPRE_Real *f_vector_data; HYPRE_Int i; HYPRE_Int jj; HYPRE_Int column; HYPRE_Int relax_error = 0; HYPRE_Real *A_mat; HYPRE_Real *b_vec; HYPRE_Real zero = 0.0; /*----------------------------------------------------------------------- * Switch statement to direct control based on relax_type: * relax_type = 7 -> Jacobi (uses ParMatvec) * relax_type = 9 -> Direct Solve *-----------------------------------------------------------------------*/ switch (relax_type) { case 7: /* Jacobi (uses ParMatvec) */ { /*----------------------------------------------------------------- * Copy f into temporary vector. *-----------------------------------------------------------------*/ hypre_ParVectorCopy(f,Vtemp); /*----------------------------------------------------------------- * Perform MatvecT Vtemp=f-A^Tu *-----------------------------------------------------------------*/ hypre_ParCSRMatrixMatvecT(-1.0,A, u, 1.0, Vtemp); for (i = 0; i < n; i++) { /*----------------------------------------------------------- * If diagonal is nonzero, relax point i; otherwise, skip it. *-----------------------------------------------------------*/ if (A_diag_data[A_diag_i[i]] != zero) { u_data[i] += relax_weight * Vtemp_data[i] / A_diag_data[A_diag_i[i]]; } } } break; case 9: /* Direct solve: use gaussian elimination */ { /*----------------------------------------------------------------- * Generate CSR matrix from ParCSRMatrix A *-----------------------------------------------------------------*/ if (n) { A_CSR = hypre_ParCSRMatrixToCSRMatrixAll(A); f_vector = hypre_ParVectorToVectorAll(f); A_CSR_i = hypre_CSRMatrixI(A_CSR); A_CSR_j = hypre_CSRMatrixJ(A_CSR); A_CSR_data = hypre_CSRMatrixData(A_CSR); f_vector_data = hypre_VectorData(f_vector); A_mat = hypre_CTAlloc(HYPRE_Real, n_global*n_global); b_vec = hypre_CTAlloc(HYPRE_Real, n_global); /*--------------------------------------------------------------- * Load transpose of CSR matrix into A_mat. *---------------------------------------------------------------*/ for (i = 0; i < n_global; i++) { for (jj = A_CSR_i[i]; jj < A_CSR_i[i+1]; jj++) { column = A_CSR_j[jj]; A_mat[column*n_global+i] = A_CSR_data[jj]; } b_vec[i] = f_vector_data[i]; } relax_error = gselim(A_mat,b_vec,n_global); for (i = 0; i < n; i++) { u_data[i] = b_vec[first_index+i]; } hypre_TFree(A_mat); hypre_TFree(b_vec); hypre_CSRMatrixDestroy(A_CSR); A_CSR = NULL; hypre_SeqVectorDestroy(f_vector); f_vector = NULL; } } break; } return(relax_error); }
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, HYPRE_Real alpha, HYPRE_Real eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Real *value; HYPRE_Real ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(HYPRE_Real,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { diag_j[cnt] = row_index-nx_local-1 ; diag_data[cnt++] = value[3]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { if (ix > nx_part[p]) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; if (ix < nx_part[p+1]-1) { diag_j[cnt] = row_index+nx_local+1 ; diag_data[cnt++] = value[3]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; if (ix < nx_part[p+1]-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix < nx-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } row_index++; } } if (num_procs > 1) { work = hypre_CTAlloc(HYPRE_Int,o_cnt); for (i=0; i < o_cnt; i++) work[i] = offd_j[i]; qsort0(work, 0, o_cnt-1); col_map_offd[0] = work[0]; cnt = 0; for (i=0; i < o_cnt; i++) { if (work[i] > col_map_offd[cnt]) { cnt++; col_map_offd[cnt] = work[i]; } } for (i=0; i < o_cnt; i++) { for (j=0; j < num_cols_offd; j++) { if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } } hypre_TFree(work); } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(value); return (HYPRE_ParCSRMatrix) A; }
hypre_ParCSRMatrix * hypre_ParMatMinus_F( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) /* hypre_ParMatMinus_F subtracts selected rows of its second argument from selected rows of its first argument. The marker array determines which rows are affected - those for which CF_marker<0. The result is returned as a new matrix. */ { /* If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want new Pik = Pik - Cik, for Fine i only, all k. This computation is purely local. */ /* This is _not_ a general-purpose matrix subtraction function. This is written for an interpolation problem where it is known that C(i,k) exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements). */ hypre_ParCSRMatrix *Pnew; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P ); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); HYPRE_Int *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C ); HYPRE_Int *Pnew_diag_i; HYPRE_Int *Pnew_diag_j; double *Pnew_diag_data; HYPRE_Int *Pnew_offd_i; HYPRE_Int *Pnew_offd_j; double *Pnew_offd_data; HYPRE_Int *Pnew_j2m; HYPRE_Int *Pnew_col_map_offd; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); /* HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */ HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int num_cols_offd_Pnew, num_rows_offd_Pnew; HYPRE_Int i1, jmin, jmax, jrange, jrangem1; HYPRE_Int j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg; double dc, dp; /* Pnew = hypre_ParCSRMatrixCompleteClone( C );*/ Pnew = hypre_ParCSRMatrixUnion( C, P ); ; hypre_ParCSRMatrixZero_F( Pnew, CF_marker ); /* fine rows of Pnew set to 0 */ hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */ /* ...Zero_F may not be needed depending on how Pnew is made */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag); Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag); Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd); Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd); Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag); Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd); Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew ); num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd); num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd); /* Find the j-ranges, needed to allocate a "reverse lookup" array. */ /* This is the max j - min j over P and Pnew (which here is a copy of C). Only the diag block is considered. */ /* For scalability reasons (jrange can get big) this won't work for the offd block. Also, indexing is more complicated in the offd block (c.f. col_map_offd). It's not clear, though whether the "quadratic" algorithm I'm using for the offd block is really any slower than the more complicated "linear" algorithm here. */ jrange = 0; jrangem1=-1; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* only Fine rows matter */ { jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ]; jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); /* If columns (of a given row) were in increasing order, the above would be sufficient. If not, the following would be necessary (and sufficient) */ jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1] ]; for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); } } /*----------------------------------------------------------------------- * Loop over Pnew_diag rows. Construct a temporary reverse array: * If j is a column number, Pnew_j2m[j] is the array index for j, i.e. * Pnew_diag_j[ Pnew_j2m[j] ] = j *-----------------------------------------------------------------------*/ Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange ); for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* Fine data only */ { /* just needed for an assertion below... */ for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1; jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; /* If columns (of a given row) were in increasing order, the above line would be sufficient. If not, the following loop would have to be added (or store the jmin computed above )*/ for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; hypre_assert( j-jmin>=0 ); hypre_assert( j-jmin<jrange ); Pnew_j2m[ j-jmin ] = m; } /*----------------------------------------------------------------------- * Loop over C_diag data for the current row. * Subtract each C data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc ) { jc = C_diag_j[mc]; dc = C_diag_data[mc]; m = Pnew_j2m[jc-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] -= dc; } /*----------------------------------------------------------------------- * Loop over P_diag data for the current row. * Add each P data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp ) { jp = P_diag_j[mp]; dp = P_diag_data[mp]; m = Pnew_j2m[jp-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] += dp; } } } /*----------------------------------------------------------------------- * Repeat for the offd block. *-----------------------------------------------------------------------*/ for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 ) /* Fine data only */ { if ( num_cols_offd_Pnew ) { /* This is a simple quadratic algorithm. If necessary I may try to implement the ideas used on the diag block later. */ for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m ) { j = Pnew_offd_j[m]; jg = Pnew_col_map_offd[j]; Pnew_offd_data[m] = 0; if ( num_cols_offd_C ) for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc ) { jC = C_offd_j[mc]; jCg = C_col_map_offd[jC]; if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc]; } if ( num_cols_offd_P ) for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp ) { jP = P_offd_j[mp]; jPg = P_col_map_offd[jP]; if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp]; } } } } } hypre_TFree(Pnew_j2m); return Pnew; }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int startRow, endRow, nrows, *partition, *AdiagI, *AdiagJ, nnz; int irow, colNum, index, *cscI, *cscJ, jcol, *colLengs; int *etree, permcSpec, lwork, panelSize, relax, info; double *AdiagA, *cscA, diagPivotThresh, dropTol; char refact[1]; hypre_CSRMatrix *Adiag; HYPRE_SuperLU *sluPtr; SuperMatrix sluAmat, auxAmat; superlu_options_t slu_options; SuperLUStat_t slu_stat; /* ---------------------------------------------------------------- */ /* get matrix information */ /* ---------------------------------------------------------------- */ sluPtr = (HYPRE_SuperLU *) solver; assert ( sluPtr != NULL ); HYPRE_ParCSRMatrixGetRowPartitioning( A_csr, &partition ); startRow = partition[0]; endRow = partition[1] - 1; nrows = endRow - startRow + 1; free( partition ); if ( startRow != 0 ) { printf("HYPRE_ParCSR_SuperLUSetup ERROR - start row != 0.\n"); return -1; } /* ---------------------------------------------------------------- */ /* get hypre matrix */ /* ---------------------------------------------------------------- */ Adiag = hypre_ParCSRMatrixDiag((hypre_ParCSRMatrix *) A_csr); AdiagI = hypre_CSRMatrixI(Adiag); AdiagJ = hypre_CSRMatrixJ(Adiag); AdiagA = hypre_CSRMatrixData(Adiag); nnz = AdiagI[nrows]; /* ---------------------------------------------------------------- */ /* convert the csr matrix into csc matrix */ /* ---------------------------------------------------------------- */ colLengs = (int *) malloc(nrows * sizeof(int)); for ( irow = 0; irow < nrows; irow++ ) colLengs[irow] = 0; for ( irow = 0; irow < nrows; irow++ ) for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) colLengs[AdiagJ[jcol]]++; cscJ = (int *) malloc( (nrows+1) * sizeof(int) ); cscI = (int *) malloc( nnz * sizeof(int) ); cscA = (double *) malloc( nnz * sizeof(double) ); cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } for ( irow = 0; irow < nrows; irow++ ) { for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) { colNum = AdiagJ[jcol]; index = cscJ[colNum]++; cscI[index] = irow; cscA[index] = AdiagA[jcol]; } } cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } free(colLengs); /* ---------------------------------------------------------------- */ /* create SuperMatrix */ /* ---------------------------------------------------------------- */ dCreate_CompCol_Matrix(&sluAmat,nrows,nrows,cscJ[nrows],cscA,cscI, cscJ, SLU_NC, SLU_D, SLU_GE); etree = (int *) malloc(nrows * sizeof(int)); sluPtr->permC_ = (int *) malloc(nrows * sizeof(int)); sluPtr->permR_ = (int *) malloc(nrows * sizeof(int)); permcSpec = 0; get_perm_c(permcSpec, &sluAmat, sluPtr->permC_); slu_options.Fact = DOFACT; slu_options.SymmetricMode = NO; sp_preorder(&slu_options, &sluAmat, sluPtr->permC_, etree, &auxAmat); diagPivotThresh = 1.0; dropTol = 0.0; panelSize = sp_ienv(1); relax = sp_ienv(2); StatInit(&slu_stat); lwork = 0; slu_options.ColPerm = MY_PERMC; slu_options.DiagPivotThresh = diagPivotThresh; dgstrf(&slu_options, &auxAmat, dropTol, relax, panelSize, etree, NULL, lwork, sluPtr->permC_, sluPtr->permR_, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), &slu_stat, &info); Destroy_CompCol_Permuted(&auxAmat); Destroy_CompCol_Matrix(&sluAmat); free(etree); sluPtr->factorized_ = 1; StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSetup ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }