HYPRE_Int hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A) { hypre_ParCSRCommPkg *comm_pkg; MPI_Comm comm = hypre_ParCSRMatrixComm(A); /* hypre_MPI_Datatype *recv_mpi_types; hypre_MPI_Datatype *send_mpi_types; */ HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A); HYPRE_Int ierr = 0; HYPRE_Int num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)); HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A); hypre_MatTCommPkgCreate_core ( comm, col_map_offd, first_col_diag, col_starts, num_rows_diag, num_cols_diag, num_cols_offd, row_starts, hypre_ParCSRMatrixFirstColDiag(A), hypre_ParCSRMatrixColMapOffd(A), hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ), 1, &num_recvs, &recv_procs, &recv_vec_starts, &num_sends, &send_procs, &send_map_starts, &send_map_elmts ); comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1); hypre_ParCSRCommPkgComm(comm_pkg) = comm; hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs; hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts; hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends; hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs; hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts; hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts; hypre_ParCSRMatrixCommPkgT(A) = comm_pkg; return ierr; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };
HYPRE_Int hypre_SchwarzSetup(void *schwarz_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) { hypre_SchwarzData *schwarz_data = schwarz_vdata; HYPRE_Int *dof_func; double *scale; hypre_CSRMatrix *domain_structure; hypre_CSRMatrix *A_boundary; hypre_ParVector *Vtemp; HYPRE_Int *pivots = NULL; HYPRE_Int variant = hypre_SchwarzDataVariant(schwarz_data); HYPRE_Int domain_type = hypre_SchwarzDataDomainType(schwarz_data); HYPRE_Int overlap = hypre_SchwarzDataOverlap(schwarz_data); HYPRE_Int num_functions = hypre_SchwarzDataNumFunctions(schwarz_data); double relax_weight = hypre_SchwarzDataRelaxWeight(schwarz_data); HYPRE_Int use_nonsymm = hypre_SchwarzDataUseNonSymm(schwarz_data); dof_func = hypre_SchwarzDataDofFunc(schwarz_data); Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixRowStarts(A)); hypre_ParVectorSetPartitioningOwner(Vtemp,0); hypre_ParVectorInitialize(Vtemp); hypre_SchwarzDataVtemp(schwarz_data) = Vtemp; if (variant > 1) { hypre_ParAMGCreateDomainDof(A, domain_type, overlap, num_functions, dof_func, &domain_structure, &pivots, use_nonsymm); if (variant == 2) { hypre_ParGenerateScale(A, domain_structure, relax_weight, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; } else { hypre_ParGenerateHybridScale(A, domain_structure, &A_boundary, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; if (hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A))) hypre_SchwarzDataABoundary(schwarz_data) = A_boundary; else hypre_SchwarzDataABoundary(schwarz_data) = NULL; } } else { hypre_AMGCreateDomainDof (hypre_ParCSRMatrixDiag(A), domain_type, overlap, num_functions, dof_func, &domain_structure, &pivots, use_nonsymm); if (variant == 1) { hypre_GenerateScale(domain_structure, hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)), relax_weight, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; } } hypre_SchwarzDataDomainStructure(schwarz_data) = domain_structure; hypre_SchwarzDataPivots(schwarz_data) = pivots; return hypre_error_flag; }
hypre_ParCSRBlockMatrix * hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix, HYPRE_Int matrix_C_block_size ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix); HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL; HYPRE_Int *matrix_C_col_map_offd = NULL; HYPRE_Int matrix_C_num_cols_offd; HYPRE_Int matrix_C_num_nonzeros_offd; HYPRE_Int num_rows, num_nodes; HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd); hypre_ParCSRBlockMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; hypre_CSRBlockMatrix *matrix_C_diag; hypre_CSRBlockMatrix *matrix_C_offd; HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL; HYPRE_Complex *matrix_C_offd_data = NULL; HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #endif /************* create the diagonal part ************/ matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, matrix_C_block_size); /******* the offd part *******************/ /* can't use the same function for the offd part - because this isn't square and the offd j entries aren't global numbering (have to consider the offd map) - need to look at col_map_offd first */ /* figure out the new number of offd columns (num rows is same as diag) */ num_cols_offd = hypre_CSRMatrixNumCols(offd); num_rows = hypre_CSRMatrixNumRows(diag); num_nodes = num_rows/matrix_C_block_size; matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1); matrix_C_num_cols_offd = 0; matrix_C_offd_i[0] = 0; matrix_C_num_nonzeros_offd = 0; if (num_cols_offd) { map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_num_cols_offd = 1; map_to_node[0] = col_map_offd[0]/matrix_C_block_size; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/matrix_C_block_size; if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++; } matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_col_map_offd[0] = map_to_node[0]; col_in_j_map[0] = 0; count = 1; j = 1; /* fill in the col_map_off_d - these are global numbers. Then we need to map these to j entries (these have local numbers) */ for (i=1; i < num_cols_offd; i++) { if (map_to_node[i] > map_to_node[i-1]) { matrix_C_col_map_offd[count++] = map_to_node[i]; } col_in_j_map[j++] = count - 1; } /* now figure the nonzeros */ matrix_C_num_nonzeros_offd = 0; counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; for (i=0; i < num_nodes; i++) /* for each block row */ { matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd; for (j=0; j < matrix_C_block_size; j++) { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has been in this block row (i) already*/ if (counter[k_map] < i) /* not yet counted for this nodal row */ { counter[k_map] = i; matrix_C_num_nonzeros_offd++; } } } } /* fill in final i entry */ matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd; } /* create offd matrix */ matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes, matrix_C_num_cols_offd, matrix_C_num_nonzeros_offd); /* assign i */ hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i; /* create (and allocate j and data) */ if (matrix_C_num_nonzeros_offd) { matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd); matrix_C_offd_data = hypre_CTAlloc(HYPRE_Complex, matrix_C_num_nonzeros_offd*matrix_C_block_size* matrix_C_block_size); hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j; hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data; for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; index = 0; /*keep track of entry in matrix_C_offd_j*/ start_index = 0; for (i=0; i < num_nodes; i++) /* for each block row */ { for (j=0; j < matrix_C_block_size; j++) /* for each row in block */ { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col for off_d */ if (counter[k_map] < start_index) /* not yet counted for this nodal row */ { counter[k_map] = index; matrix_C_offd_j[index] = k_map; /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; index ++; } else /* this col has already been listed for this row */ { /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; } } } start_index = index; /* first index for current nodal row */ } } /* *********create the new matrix *************/ matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, global_num_rows/matrix_C_block_size, global_num_cols/matrix_C_block_size, matrix_C_row_starts, matrix_C_col_starts, matrix_C_num_cols_offd, hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), matrix_C_num_nonzeros_offd); /* use the diag and off diag matrices we have already created */ hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag; hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd; hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd; /* *********don't bother to copy the comm_pkg *************/ hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL; /* CLEAN UP !!!! */ hypre_TFree(map_to_node); hypre_TFree(col_in_j_map); hypre_TFree(counter); return matrix_C; }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/****************************************************************************** * * hypre_IJMatrixInsertRowPETSc * * inserts a row into an IJMatrix, * if diag_i and offd_i are known, those values are inserted directly * into the ParCSRMatrix, * if they are not known, an auxiliary structure, AuxParCSRMatrix is used * *****************************************************************************/ HYPRE_Int hypre_IJMatrixInsertRowPETSc(hypre_IJMatrix *matrix, HYPRE_Int n, HYPRE_Int row, HYPRE_Int *indices, double *coeffs) { HYPRE_Int ierr = 0; hypre_ParCSRMatrix *par_matrix; hypre_AuxParCSRMatrix *aux_matrix; HYPRE_Int *row_starts; HYPRE_Int *col_starts; MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int num_procs, my_id; HYPRE_Int row_local; HYPRE_Int col_0, col_n; HYPRE_Int i, temp; HYPRE_Int *indx_diag, *indx_offd; HYPRE_Int **aux_j; HYPRE_Int *local_j; double **aux_data; double *local_data; HYPRE_Int diag_space, offd_space; HYPRE_Int *row_length, *row_space; HYPRE_Int need_aux; HYPRE_Int indx_0; HYPRE_Int diag_indx, offd_indx; hypre_CSRMatrix *diag; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; hypre_CSRMatrix *offd; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); par_matrix = hypre_IJMatrixLocalStorage( matrix ); aux_matrix = hypre_IJMatrixTranslator(matrix); row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); row_length = hypre_AuxParCSRMatrixRowLength(aux_matrix); col_n = hypre_ParCSRMatrixFirstColDiag(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); col_0 = col_starts[my_id]; col_n = col_starts[my_id+1]-1; need_aux = hypre_AuxParCSRMatrixNeedAux(aux_matrix); if (row >= row_starts[my_id] && row < row_starts[my_id+1]) { if (need_aux) { row_local = row - row_starts[my_id]; /* compute local row number */ aux_j = hypre_AuxParCSRMatrixAuxJ(aux_matrix); aux_data = hypre_AuxParCSRMatrixAuxData(aux_matrix); local_j = aux_j[row_local]; local_data = aux_data[row_local]; row_length[row_local] = n; if ( row_space[row_local] < n) { hypre_TFree(local_j); hypre_TFree(local_data); local_j = hypre_CTAlloc(HYPRE_Int,n); local_data = hypre_CTAlloc(double,n); row_space[row_local] = n; } for (i=0; i < n; i++) { local_j[i] = indices[i]; local_data[i] = coeffs[i]; } /* make sure first element is diagonal element, if not, find it and exchange it with first element */ if (local_j[0] != row_local) { for (i=1; i < n; i++) { if (local_j[i] == row_local) { local_j[i] = local_j[0]; local_j[0] = row_local; temp = local_data[0]; local_data[0] = local_data[i]; local_data[i] = temp; break; } } } /* sort data according to column indices, except for first element */ qsort1(local_j,local_data,1,n-1); } else /* insert immediately into data into ParCSRMatrix structure */ {
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_ext_row_map; HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; hypre_CSRMatrix *C_diag; HYPRE_Complex *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; HYPRE_Complex *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int *new_C_offd_j; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int last_col_diag_C; HYPRE_Int num_cols_offd_C; hypre_CSRMatrix *A_ext; HYPRE_Complex *A_ext_data; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Int num_rows_A_ext=0; HYPRE_Int first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *B_marker; HYPRE_Int i; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int count; HYPRE_Int n_rows_A, n_cols_A; HYPRE_Complex a_entry; HYPRE_Complex a_b_product; HYPRE_Complex zero = 0.0; n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); if (n_cols_A != n_rows_A) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /*----------------------------------------------------------------------- * Extract A_ext, i.e. portion of A that is stored on neighbor procs * and needed locally for A^T in the matrix matrix product A*A^T *-----------------------------------------------------------------------*/ if (num_rows_diag_A != n_rows_A) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!hypre_ParCSRMatrixCommPkg(A)) { hypre_MatTCommPkgCreate(A); } A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map ); A_ext_data = hypre_CSRMatrixData(A_ext); A_ext_i = hypre_CSRMatrixI(A_ext); A_ext_j = hypre_CSRMatrixJ(A_ext); num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext); } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext ); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } hypre_ParAat_RowSizes( &C_diag_i, &C_offd_i, B_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, A_col_map_offd, A_ext_i, A_ext_j, A_ext_row_map, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, num_rows_A_ext, first_col_diag_A, first_row_index_A ); #if 0 /* debugging output: */ hypre_printf("A_ext_row_map (%i):",num_rows_A_ext); for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] ); hypre_printf("\nC_diag_i (%i):",C_diag_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] ); hypre_printf("\nC_offd_i (%i):",C_offd_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] ); hypre_printf("\n"); #endif /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_C = first_row_index_A + num_rows_diag_A - 1; C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ B_marker[i1] = jj_count_diag; jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; C_diag_data[jj_count_diag] = zero; C_diag_j[jj_count_diag] = i1; jj_count_diag++; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ /* There are 3 CSRMatrix or CSRBooleanMatrix objects here: ext*ext, ext*diag, and ext*offd belong to another processor. diag*offd and offd*diag don't count - never share a column by definition. So we have to do 4 cases: diag*ext, offd*ext, diag*diag, and offd*offd. */ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /* diag*ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==i2+first_col_diag_A ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_col_diag_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; /* offd * ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==A_col_map_offd[i2] ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_row_index_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } } /* diag * diag */ /*----------------------------------------------------------------- * Loop over entries (columns) i2 in row i1 of A_diag. * For each such column we will find the contributions of the * corresponding rows i2 of A^T to C=A*A^T . Now we only look * at the local part of A^T - with columns (rows of A) living * on this processor. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the diagonal block of A. * It contributes to the diagonal block of C. * For each entry (i2,i3) of A^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_diag_A; i3++ ) { for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) { if ( A_diag_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_diag_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, mark it and increment * counter. *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of i3 loop */ } /* end of third i2 loop */ /* offd * offd */ /*----------------------------------------------------------- * Loop over offd columns i2 of A in A*A^T. Then * loop over offd entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the off-diagonal block of A. * It contributes to the diag block of C. * For each entry (i2,i3) of A^T, add A*A^T to C *-----------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; for ( i3=0; i3<num_rows_diag_A; i3++ ) { /* ... note that num_rows_diag_A == num_rows_offd_A */ for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) { if ( A_offd_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_offd_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of last i3 loop */ } /* end of if (num_cols_offd_A) */ } /* end of fourth and last i2 loop */ #if 0 /* debugging printout */ hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag ); hypre_printf(" C_diag_j="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]); hypre_printf(" C_diag_data="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]); hypre_printf("\n"); hypre_printf(" C_offd_j="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]); hypre_printf(" C_offd_data="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]); hypre_printf("\n"); hypre_printf( " B_marker =" ); for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it ) hypre_printf(" %i", B_marker[it] ); hypre_printf( "\n" ); #endif } /* end of i1 loop */ /*----------------------------------------------------------------------- * Delete 0-columns in C_offd, i.e. generate col_map_offd and reset * C_offd_j. Note that (with the indexing we have coming into this * block) col_map_offd_C[i3]==A_ext_row_map[i3]. *-----------------------------------------------------------------------*/ for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i ) B_marker[i] = -1; for ( i=0; i<C_offd_size; i++ ) B_marker[ C_offd_j[i] ] = -2; count = 0; for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) { if (B_marker[i] == -2) { B_marker[i] = count; count++; } } num_cols_offd_C = count; if (num_cols_offd_C) { col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size); /* ... a bit big, but num_cols_offd_C is too small. It might be worth computing the correct size, which is sum( no. columns in row i, over all rows i ) */ for (i=0; i < C_offd_size; i++) { new_C_offd_j[i] = B_marker[C_offd_j[i]]; col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ]; } hypre_TFree(C_offd_j); C_offd_j = new_C_offd_j; } /*---------------------------------------------------------------- * Create C *----------------------------------------------------------------*/ C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A, row_starts_A, num_cols_offd_C, C_diag_size, C_offd_size); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; if (num_cols_offd_C) { C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixOffd(C) = C_offd; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } else hypre_TFree(C_offd_i); /*----------------------------------------------------------------------- * Free B_ext and marker array. *-----------------------------------------------------------------------*/ if (num_cols_offd_A) { hypre_CSRMatrixDestroy(A_ext); A_ext = NULL; } hypre_TFree(B_marker); if ( num_rows_diag_A != n_rows_A ) hypre_TFree(A_ext_row_map); return C; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_CSRMatrix *matrix; hypre_CSRMatrix *matrix1; hypre_ParCSRMatrix *par_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_Vector *y2_local; hypre_ParVector *x; hypre_ParVector *x2; hypre_ParVector *y; hypre_ParVector *y2; HYPRE_Int vecstride_x, idxstride_x, vecstride_y, idxstride_y; HYPRE_Int num_procs, my_id; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int global_num_rows, global_num_cols; HYPRE_Int first_index; HYPRE_Int i, j, ierr=0; double *data, *data2; HYPRE_Int *row_starts, *col_starts; char file_name[80]; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); if (my_id == 0) { matrix = hypre_CSRMatrixRead("input"); hypre_printf(" read input\n"); } row_starts = NULL; col_starts = NULL; par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, row_starts, col_starts); hypre_printf(" converted\n"); matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix); hypre_sprintf(file_name,"matrix1.%d",my_id); if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name); hypre_ParCSRMatrixPrint(par_matrix,"matrix"); hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ"); par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix"); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); hypre_printf(" global_num_cols %d\n", global_num_cols); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); first_index = col_starts[my_id]; local_size = col_starts[my_id+1] - first_index; num_vectors = 3; x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); vecstride_x = hypre_VectorVectorStride(x_local); idxstride_x = hypre_VectorIndexStride(x_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j; x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x2,0); hypre_ParVectorInitialize(x2); hypre_ParVectorSetConstantValues(x2,2.0); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); first_index = row_starts[my_id]; local_size = row_starts[my_id+1] - first_index; y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); y_local = hypre_ParVectorLocalVector(y); y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y2,0); hypre_ParVectorInitialize(y2); y2_local = hypre_ParVectorLocalVector(y2); data2 = hypre_VectorData(y2_local); vecstride_y = hypre_VectorVectorStride(y2_local); idxstride_y = hypre_VectorIndexStride(y2_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j; hypre_ParVectorSetConstantValues(y,1.0); hypre_printf(" initialized vectors, first_index=%i\n", first_index); hypre_ParVectorPrint(x, "vectorx"); hypre_ParVectorPrint(y, "vectory"); hypre_MatvecCommPkgCreate(par_matrix); hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y); hypre_printf(" did matvec\n"); hypre_ParVectorPrint(y, "result"); ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2); hypre_printf(" did matvecT %d\n", ierr); hypre_ParVectorPrint(x2, "transp"); hypre_ParCSRMatrixDestroy(par_matrix); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(x2); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y2); if (my_id == 0) hypre_CSRMatrixDestroy(matrix); if (matrix1) hypre_CSRMatrixDestroy(matrix1); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
HYPRE_Int AmgCGCGraphAssemble (hypre_ParCSRMatrix *S,HYPRE_Int *vertexrange,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd,HYPRE_Int coarsen_type, HYPRE_IJMatrix *ijG) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * vertexrange : the parallel layout of the candidate coarse grid vertices * CF_marker, CF_marker_offd : the coarse/fine markers * coarsen_type : the coarsening type * ijG : the created graph * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int i,/* ii,*/ip,j,jj,m,n,p; HYPRE_Int mpisize,mpirank; HYPRE_Real weight; MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ HYPRE_IJMatrix ijmatrix; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); /* HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); */ /* HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); */ HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); HYPRE_Int pointrange_start,pointrange_end; HYPRE_Int *pointrange,*pointrange_nonlocal,*pointrange_strong=NULL; HYPRE_Int vertexrange_start,vertexrange_end; HYPRE_Int *vertexrange_strong= NULL; HYPRE_Int *vertexrange_nonlocal; HYPRE_Int num_recvs,num_recvs_strong; HYPRE_Int *recv_procs,*recv_procs_strong=NULL; HYPRE_Int /* *zeros,*rownz,*/*rownz_diag,*rownz_offd; HYPRE_Int nz; HYPRE_Int nlocal; HYPRE_Int one=1; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs (comm_pkg); pointrange = hypre_ParCSRMatrixRowStarts (S); pointrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); vertexrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs (comm_pkg); HYPRE_Int *int_buf_data = hypre_CTAlloc (HYPRE_Int,4*num_sends); HYPRE_Int *int_buf_data2 = int_buf_data + 2*num_sends; hypre_MPI_Request *sendrequest,*recvrequest; nlocal = vertexrange[1] - vertexrange[0]; pointrange_start = pointrange[0]; pointrange_end = pointrange[1]; vertexrange_start = vertexrange[0]; vertexrange_end = vertexrange[1]; sendrequest = hypre_CTAlloc (hypre_MPI_Request,2*(num_sends+num_recvs)); recvrequest = sendrequest+2*num_sends; for (i=0;i<num_recvs;i++) { hypre_MPI_Irecv (pointrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_pointrange,comm,&recvrequest[2*i]); hypre_MPI_Irecv (vertexrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_vertexrange,comm,&recvrequest[2*i+1]); } for (i=0;i<num_sends;i++) { int_buf_data[2*i] = pointrange_start; int_buf_data[2*i+1] = pointrange_end; int_buf_data2[2*i] = vertexrange_start; int_buf_data2[2*i+1] = vertexrange_end; hypre_MPI_Isend (int_buf_data+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_pointrange,comm,&sendrequest[2*i]); hypre_MPI_Isend (int_buf_data2+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_vertexrange,comm,&sendrequest[2*i+1]); } hypre_MPI_Waitall (2*(num_sends+num_recvs),sendrequest,hypre_MPI_STATUSES_IGNORE); hypre_TFree (int_buf_data); hypre_TFree (sendrequest); } #else nlocal = vertexrange[mpirank+1] - vertexrange[mpirank]; pointrange_start = pointrange[mpirank]; pointrange_end = pointrange[mpirank+1]; vertexrange_start = vertexrange[mpirank]; vertexrange_end = vertexrange[mpirank+1]; for (i=0;i<num_recvs;i++) { pointrange_nonlocal[2*i] = pointrange[recv_procs[i]]; pointrange_nonlocal[2*i+1] = pointrange[recv_procs[i]+1]; vertexrange_nonlocal[2*i] = vertexrange[recv_procs[i]]; vertexrange_nonlocal[2*i+1] = vertexrange[recv_procs[i]+1]; } #endif /* now we have the array recv_procs. However, it may contain too many entries as it is inherited from A. We now have to determine the subset which contains only the strongly connected neighbors */ if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); recv_procs_strong = hypre_CTAlloc (HYPRE_Int,num_recvs); memset (recv_procs_strong,0,num_recvs*sizeof(HYPRE_Int)); /* don't forget to shorten the pointrange and vertexrange arrays accordingly */ pointrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (pointrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); vertexrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (vertexrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); for (i=0;i<num_variables;i++) for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = col_map_offd[S_offd_j[j]]; for (p=0;p<num_recvs;p++) /* S_offd_j is NOT sorted! */ if (jj >= pointrange_nonlocal[2*p] && jj < pointrange_nonlocal[2*p+1]) break; #if 0 hypre_printf ("Processor %d, remote point %d on processor %d\n",mpirank,jj,recv_procs[p]); #endif recv_procs_strong [p]=1; } for (p=0,num_recvs_strong=0;p<num_recvs;p++) { if (recv_procs_strong[p]) { recv_procs_strong[num_recvs_strong]=recv_procs[p]; pointrange_strong[2*num_recvs_strong] = pointrange_nonlocal[2*p]; pointrange_strong[2*num_recvs_strong+1] = pointrange_nonlocal[2*p+1]; vertexrange_strong[2*num_recvs_strong] = vertexrange_nonlocal[2*p]; vertexrange_strong[2*num_recvs_strong+1] = vertexrange_nonlocal[2*p+1]; num_recvs_strong++; } } } else num_recvs_strong=0; hypre_TFree (pointrange_nonlocal); hypre_TFree (vertexrange_nonlocal); rownz_diag = hypre_CTAlloc (HYPRE_Int,2*nlocal); rownz_offd = rownz_diag + nlocal; for (p=0,nz=0;p<num_recvs_strong;p++) { nz += vertexrange_strong[2*p+1]-vertexrange_strong[2*p]; } for (m=0;m<nlocal;m++) { rownz_diag[m]=nlocal-1; rownz_offd[m]=nz; } HYPRE_IJMatrixCreate(comm, vertexrange_start, vertexrange_end-1, vertexrange_start, vertexrange_end-1, &ijmatrix); HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); HYPRE_IJMatrixSetDiagOffdSizes (ijmatrix, rownz_diag, rownz_offd); HYPRE_IJMatrixInitialize(ijmatrix); hypre_TFree (rownz_diag); /* initialize graph */ weight = -1; for (m=vertexrange_start;m<vertexrange_end;m++) { for (p=0;p<num_recvs_strong;p++) { for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while initializing graphs at (%d, %d)\n",mpirank,ierr,m,n); #endif } } } /* weight graph */ for (i=0;i<num_variables;i++) { for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = S_offd_j[j]; /* jj is not a global index!!! */ /* determine processor */ for (p=0;p<num_recvs_strong;p++) if (col_map_offd[jj] >= pointrange_strong[2*p] && col_map_offd[jj] < pointrange_strong[2*p+1]) break; ip=recv_procs_strong[p]; /* loop over all coarse grids constructed on this processor domain */ for (m=vertexrange_start;m<vertexrange_end;m++) { /* loop over all coarse grids constructed on neighbor processor domain */ for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { /* coarse grid counting inside gridpartition->local/gridpartition->nonlocal starts with one while counting inside range starts with zero */ if (CF_marker[i]-1==m && CF_marker_offd[jj]-1==n) /* C-C-coupling */ weight = -1; else if ( (CF_marker[i]-1==m && (CF_marker_offd[jj]==0 || CF_marker_offd[jj]-1!=n) ) || ( (CF_marker[i]==0 || CF_marker[i]-1!=m) && CF_marker_offd[jj]-1==n ) ) /* C-F-coupling */ weight = 0; else weight = -8; /* F-F-coupling */ ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while adding %lf to entry (%d, %d)\n",mpirank,ierr,weight,m,n); #endif } } } } /* assemble */ HYPRE_IJMatrixAssemble (ijmatrix); /*if (num_recvs_strong) {*/ hypre_TFree (recv_procs_strong); hypre_TFree (pointrange_strong); hypre_TFree (vertexrange_strong); /*} */ *ijG = ijmatrix; return (ierr); }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_Int hypre_MaxwellSolve2( void * maxwell_vdata, hypre_SStructMatrix * A_in, hypre_SStructVector * f, hypre_SStructVector * u ) { hypre_MaxwellData *maxwell_data = maxwell_vdata; hypre_ParVector *f_edge; hypre_ParVector *u_edge; HYPRE_Int max_iter = maxwell_data-> max_iter; double tol = maxwell_data-> tol; HYPRE_Int rel_change = maxwell_data-> rel_change; HYPRE_Int zero_guess = maxwell_data-> zero_guess; HYPRE_Int npre_relax = maxwell_data-> num_pre_relax; HYPRE_Int npost_relax = maxwell_data-> num_post_relax; hypre_ParCSRMatrix **Ann_l = maxwell_data-> Ann_l; hypre_ParCSRMatrix **Pn_l = maxwell_data-> Pn_l; hypre_ParCSRMatrix **RnT_l = maxwell_data-> RnT_l; hypre_ParVector **bn_l = maxwell_data-> bn_l; hypre_ParVector **xn_l = maxwell_data-> xn_l; hypre_ParVector **resn_l = maxwell_data-> resn_l; hypre_ParVector **en_l = maxwell_data-> en_l; hypre_ParVector **nVtemp2_l = maxwell_data-> nVtemp2_l; HYPRE_Int **nCF_marker_l = maxwell_data-> nCF_marker_l; double *nrelax_weight= maxwell_data-> nrelax_weight; double *nomega = maxwell_data-> nomega; HYPRE_Int nrelax_type = maxwell_data-> nrelax_type; HYPRE_Int node_numlevs = maxwell_data-> node_numlevels; hypre_ParCSRMatrix *Tgrad = maxwell_data-> Tgrad; hypre_ParCSRMatrix *T_transpose = maxwell_data-> T_transpose; hypre_ParCSRMatrix **Aee_l = maxwell_data-> Aee_l; hypre_IJMatrix **Pe_l = maxwell_data-> Pe_l; hypre_IJMatrix **ReT_l = maxwell_data-> ReT_l; hypre_ParVector **be_l = maxwell_data-> be_l; hypre_ParVector **xe_l = maxwell_data-> xe_l; hypre_ParVector **rese_l = maxwell_data-> rese_l; hypre_ParVector **ee_l = maxwell_data-> ee_l; hypre_ParVector **eVtemp2_l = maxwell_data-> eVtemp2_l; HYPRE_Int **eCF_marker_l = maxwell_data-> eCF_marker_l; double *erelax_weight= maxwell_data-> erelax_weight; double *eomega = maxwell_data-> eomega; HYPRE_Int erelax_type = maxwell_data-> erelax_type; HYPRE_Int edge_numlevs = maxwell_data-> edge_numlevels; HYPRE_Int **BdryRanks_l = maxwell_data-> BdryRanks_l; HYPRE_Int *BdryRanksCnts_l= maxwell_data-> BdryRanksCnts_l; HYPRE_Int logging = maxwell_data-> logging; double *norms = maxwell_data-> norms; double *rel_norms = maxwell_data-> rel_norms; HYPRE_Int Solve_err_flag; HYPRE_Int relax_local, cycle_param; double b_dot_b = 0, r_dot_r, eps = 0; double e_dot_e, x_dot_x; HYPRE_Int i, j; HYPRE_Int level; HYPRE_Int ierr= 0; /* added for the relaxation routines */ hypre_ParVector *ze = NULL; if (hypre_NumThreads() > 1) { /* Aee is always bigger than Ann */ ze = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(Aee_l[0]), hypre_ParCSRMatrixGlobalNumRows(Aee_l[0]), hypre_ParCSRMatrixRowStarts(Aee_l[0])); hypre_ParVectorInitialize(ze); hypre_ParVectorSetPartitioningOwner(ze,0); } hypre_BeginTiming(maxwell_data-> time_index); hypre_SStructVectorConvert(f, &f_edge); hypre_SStructVectorConvert(u, &u_edge); hypre_ParVectorZeroBCValues(f_edge, BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParVectorZeroBCValues(u_edge, BdryRanks_l[0], BdryRanksCnts_l[0]); be_l[0]= f_edge; xe_l[0]= u_edge; /* the nodal fine vectors: xn= 0. bn= T'*(be- Aee*xe) is updated in the cycle. */ hypre_ParVectorSetConstantValues(xn_l[0], 0.0); relax_local= 0; cycle_param= 0; (maxwell_data-> num_iterations) = 0; /* if max_iter is zero, return */ if (max_iter == 0) { /* if using a zero initial guess, return zero */ if (zero_guess) { hypre_ParVectorSetConstantValues(xe_l[0], 0.0); } hypre_EndTiming(maxwell_data -> time_index); return ierr; } /* part of convergence check */ if (tol > 0.0) { /* eps = (tol^2) */ b_dot_b= hypre_ParVectorInnerProd(be_l[0], be_l[0]); eps = tol*tol; /* if rhs is zero, return a zero solution */ if (b_dot_b == 0.0) { hypre_ParVectorSetConstantValues(xe_l[0], 0.0); if (logging > 0) { norms[0] = 0.0; rel_norms[0] = 0.0; } hypre_EndTiming(maxwell_data -> time_index); return ierr; } } /*----------------------------------------------------- * Do V-cycles: * For each index l, "fine" = l, "coarse" = (l-1) * * solution update: * edge_sol= edge_sol + T*node_sol *-----------------------------------------------------*/ for (i = 0; i < max_iter; i++) { /* compute fine grid residual & nodal rhs. */ hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); hypre_ParVectorZeroBCValues(rese_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParCSRMatrixMatvec(1.0, T_transpose, rese_l[0], 0.0, bn_l[0]); /* convergence check */ if (tol > 0.0) { r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]); if (logging > 0) { norms[i] = sqrt(r_dot_r); if (b_dot_b > 0) rel_norms[i] = sqrt(r_dot_r/b_dot_b); else rel_norms[i] = 0.0; } /* always do at least 1 V-cycle */ if ((r_dot_r/b_dot_b < eps) && (i > 0)) { if (rel_change) { if ((e_dot_e/x_dot_x) < eps) break; } else { break; } } } hypre_ParVectorCopy(bn_l[0], resn_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]); r_dot_r= hypre_ParVectorInnerProd(resn_l[0], resn_l[0]); for (level= 0; level<= node_numlevs-2; level++) { /*----------------------------------------------- * Down cycle *-----------------------------------------------*/ for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); } /*for (j= 0; j< npre_relax; j++) */ /* compute residuals */ hypre_ParVectorCopy(bn_l[level], resn_l[level]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[level], xn_l[level], 1.0, resn_l[level]); /* restrict residuals */ hypre_ParCSRMatrixMatvecT(1.0, RnT_l[level], resn_l[level], 0.0, bn_l[level+1]); /* zero off initial guess for the next level */ hypre_ParVectorSetConstantValues(xn_l[level+1], 0.0); } /* for (level= 0; level<= node_numlevs-2; level++) */ /* coarsest node solve */ level= node_numlevs-1; Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); /*--------------------------------------------------------------------- * Cycle up the levels. *---------------------------------------------------------------------*/ for (level= (node_numlevs - 2); level>= 1; level--) { hypre_ParCSRMatrixMatvec(1.0, Pn_l[level], xn_l[level+1], 0.0, en_l[level]); hypre_ParVectorAxpy(1.0, en_l[level], xn_l[level]); /* post smooth */ for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); } } /* for (level= (en_numlevs - 2); level>= 1; level--) */ /* interpolate error and correct on finest grids */ hypre_ParCSRMatrixMatvec(1.0, Pn_l[0], xn_l[1], 0.0, en_l[0]); hypre_ParVectorAxpy(1.0, en_l[0], xn_l[0]); for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[0], bn_l[0], nCF_marker_l[0], nrelax_type, relax_local, cycle_param, nrelax_weight[0], nomega[0], NULL, xn_l[0], nVtemp2_l[0], ze); } /* for (j= 0; j< npost_relax; j++) */ hypre_ParVectorCopy(bn_l[0], resn_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]); /* add the gradient solution component to xe_l[0] */ hypre_ParCSRMatrixMatvec(1.0, Tgrad, xn_l[0], 1.0, xe_l[0]); hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]); for (level= 0; level<= edge_numlevs-2; level++) { /*----------------------------------------------- * Down cycle *-----------------------------------------------*/ for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } /*for (j= 0; j< npre_relax; j++) */ /* compute residuals */ hypre_ParVectorCopy(be_l[level], rese_l[level]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[level], xe_l[level], 1.0, rese_l[level]); /* restrict residuals */ hypre_ParCSRMatrixMatvecT(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(ReT_l[level]), rese_l[level], 0.0, be_l[level+1]); hypre_ParVectorZeroBCValues(be_l[level+1], BdryRanks_l[level+1], BdryRanksCnts_l[level+1]); /* zero off initial guess for the next level */ hypre_ParVectorSetConstantValues(xe_l[level+1], 0.0); } /* for (level= 1; level<= edge_numlevels-2; level++) */ /* coarsest edge solve */ level= edge_numlevs-1; for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } /*--------------------------------------------------------------------- * Up cycle. *---------------------------------------------------------------------*/ for (level= (edge_numlevs - 2); level>= 1; level--) { hypre_ParCSRMatrixMatvec(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[level]), xe_l[level+1], 0.0, ee_l[level]); hypre_ParVectorZeroBCValues(ee_l[level], BdryRanks_l[level], BdryRanksCnts_l[level]); hypre_ParVectorAxpy(1.0, ee_l[level], xe_l[level]); /* post smooth */ for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } } /* for (level= (edge_numlevs - 2); level>= 1; level--) */ /* interpolate error and correct on finest grids */ hypre_ParCSRMatrixMatvec(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[0]), xe_l[1], 0.0, ee_l[0]); hypre_ParVectorZeroBCValues(ee_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParVectorAxpy(1.0, ee_l[0], xe_l[0]); for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[0], be_l[0], eCF_marker_l[0], erelax_type, relax_local, cycle_param, erelax_weight[0], eomega[0], NULL, xe_l[0], eVtemp2_l[0], ze); } /* for (j= 0; j< npost_relax; j++) */ e_dot_e= hypre_ParVectorInnerProd(ee_l[0], ee_l[0]); x_dot_x= hypre_ParVectorInnerProd(xe_l[0], xe_l[0]); hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); (maxwell_data -> num_iterations) = (i + 1); } hypre_EndTiming(maxwell_data -> time_index); if (ze) hypre_ParVectorDestroy(ze); return ierr; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { HYPRE_Int num_procs, myid; HYPRE_Int verbose = 0, build_matrix_type = 1; HYPRE_Int index, matrix_arg_index, commpkg_flag=3; HYPRE_Int i, k, ierr=0; HYPRE_Int row_start, row_end; HYPRE_Int col_start, col_end, global_num_rows; HYPRE_Int *row_part, *col_part; char *csrfilename; HYPRE_Int preload = 0, loop = 0, loop2 = LOOP2; HYPRE_Int bcast_rows[2], *info; hypre_ParCSRMatrix *parcsr_A, *small_A; HYPRE_ParCSRMatrix A_temp, A_temp_small; hypre_CSRMatrix *A_CSR; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int p, q, r; HYPRE_Real values[4]; hypre_ParVector *x_new; hypre_ParVector *y_new, *y; HYPRE_Int *row_starts; HYPRE_Real ans; HYPRE_Real start_time, end_time, total_time, *loop_times; HYPRE_Real T_avg, T_std; HYPRE_Int noparmprint = 0; #if mydebug HYPRE_Int j, tmp_int; #endif /*----------------------------------------------------------- * Initialize MPI *-----------------------------------------------------------*/ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * default - is 27pt laplace *-----------------------------------------------------------*/ build_matrix_type = 2; matrix_arg_index = argc; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ index = 1; while ( index < argc) { if ( strcmp(argv[index], "-verbose") == 0 ) { index++; verbose = 1; } else if ( strcmp(argv[index], "-fromonecsrfile") == 0 ) { index++; build_matrix_type = 1; matrix_arg_index = index; /*this tells where the name is*/ } else if ( strcmp(argv[index], "-commpkg") == 0 ) { index++; commpkg_flag = atoi(argv[index++]); } else if ( strcmp(argv[index], "-laplacian") == 0 ) { index++; build_matrix_type = 2; matrix_arg_index = index; } else if ( strcmp(argv[index], "-27pt") == 0 ) { index++; build_matrix_type = 4; matrix_arg_index = index; } /* else if ( strcmp(argv[index], "-nopreload") == 0 ) { index++; preload = 0; } */ else if ( strcmp(argv[index], "-loop") == 0 ) { index++; loop = atoi(argv[index++]); } else if ( strcmp(argv[index], "-noparmprint") == 0 ) { index++; noparmprint = 1; } else { index++; /*hypre_printf("Warning: Unrecogized option '%s'\n",argv[index++] );*/ } } /*----------------------------------------------------------- * Setup the Matrix problem *-----------------------------------------------------------*/ /*----------------------------------------------------------- * Get actual partitioning- * read in an actual csr matrix. *-----------------------------------------------------------*/ if (build_matrix_type ==1) /*read in a csr matrix from one file */ { if (matrix_arg_index < argc) { csrfilename = argv[matrix_arg_index]; } else { hypre_printf("Error: No filename specified \n"); exit(1); } if (myid == 0) { /*hypre_printf(" FromFile: %s\n", csrfilename);*/ A_CSR = hypre_CSRMatrixRead(csrfilename); } row_part = NULL; col_part = NULL; parcsr_A = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, A_CSR, row_part, col_part); if (myid == 0) hypre_CSRMatrixDestroy(A_CSR); } else if (build_matrix_type ==2) { myBuildParLaplacian(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } else if (build_matrix_type ==4) { myBuildParLaplacian27pt(argc, argv, matrix_arg_index, &A_temp, !noparmprint); parcsr_A = (hypre_ParCSRMatrix *) A_temp; } /*----------------------------------------------------------- * create a small problem so that timings are more accurate - * code gets run twice (small laplace) *-----------------------------------------------------------*/ /*this is no longer being used - preload = 0 is set at the beginning */ if (preload == 1) { /*hypre_printf("preload!\n");*/ values[1] = -1; values[2] = -1; values[3] = -1; values[0] = - 6.0 ; nx = 2; ny = num_procs; nz = 2; P = 1; Q = num_procs; R = 1; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); A_temp_small = (HYPRE_ParCSRMatrix) GenerateLaplacian(hypre_MPI_COMM_WORLD, nx, ny, nz, P, Q, R, p, q, r, values); small_A = (hypre_ParCSRMatrix *) A_temp_small; /*do comm packages*/ hypre_NewCommPkgCreate(small_A); hypre_NewCommPkgDestroy(small_A); hypre_MatvecCommPkgCreate(small_A); hypre_ParCSRMatrixDestroy(small_A); } /*----------------------------------------------------------- * Prepare for timing *-----------------------------------------------------------*/ /* instead of preloading, let's not time the first one if more than one*/ if (!loop) { loop = 1; /* and don't do any timings */ } else { loop +=1; if (loop < 2) loop = 2; } loop_times = hypre_CTAlloc(HYPRE_Real, loop); /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag == 1 || commpkg_flag ==3 ) { /*----------------------------------------------------------- * Create new comm package *-----------------------------------------------------------*/ if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(1); #endif hypre_NewCommPkgCreate(parcsr_A); #if mpip_on if (i==(loop-1)) hypre_MPI_Pcontrol(0); #endif end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_NewCommPkgDestroy(parcsr_A); }/*end of loop2 */ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { /* calculate the avg and std. */ stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf(" NewCommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" NewCommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, my ACTUAL local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_GetAssumedPartitionRowRange( myid, global_num_rows, &row_start, &row_end); hypre_printf("myid = %i, my assumed local range: [%i, %i]\n", myid, row_start, row_end); hypre_printf("myid = %d, num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg) ); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * To verify correctness (if commpkg_flag = 3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*do a matvec - we are assuming a square matrix */ row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); x_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(x_new, 0); hypre_ParVectorInitialize(x_new); hypre_ParVectorSetRandomValues(x_new, 1); y_new = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows, row_starts); hypre_ParVectorSetPartitioningOwner(y_new, 0); hypre_ParVectorInitialize(y_new); hypre_ParVectorSetConstantValues(y_new, 0.0); /*y = 1.0*A*x+1.0*y */ hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y_new); } /*----------------------------------------------------------- * Clean up after MyComm *-----------------------------------------------------------*/ hypre_NewCommPkgDestroy(parcsr_A); } /******************************************************************************************/ /******************************************************************************************/ hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); if (commpkg_flag > 1 ) { /*----------------------------------------------------------- * Set up standard comm package *-----------------------------------------------------------*/ bcast_rows[0] = 23; bcast_rows[1] = 1789; if (!myid) hypre_printf("********************************************************\n" ); /*do loop times*/ for (i=0; i< loop; i++) { loop_times[i] = 0.0; for (k=0; k< loop2; k++) { hypre_MPI_Barrier(hypre_MPI_COMM_WORLD); start_time = hypre_MPI_Wtime(); #if time_gather info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(bcast_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, hypre_MPI_COMM_WORLD); #endif hypre_MatvecCommPkgCreate(parcsr_A); end_time = hypre_MPI_Wtime(); end_time = end_time - start_time; hypre_MPI_Allreduce(&end_time, &total_time, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, hypre_MPI_COMM_WORLD); loop_times[i] += total_time; if ( !((i+1)== loop && (k+1) == loop2)) hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(parcsr_A)); }/* end of loop 2*/ } /*end of loop*/ /* calculate the avg and std. */ if (loop > 1) { stats_mo(loop_times, loop, &T_avg, &T_std); if (!myid) hypre_printf("Current CommPkgCreate: AVG. wall clock time = %f seconds\n", T_avg); if (!myid) hypre_printf(" STD. for %d runs = %f\n", loop-1, T_std); if (!myid) hypre_printf(" (Note: avg./std. timings exclude run 0.)\n"); if (!myid) hypre_printf("********************************************************\n" ); for (i=0; i< loop; i++) { if (!myid) hypre_printf(" run %d = %f sec.\n", i, loop_times[i]); } if (!myid) hypre_printf("********************************************************\n" ); } else { if (!myid) hypre_printf("********************************************************\n" ); if (!myid) hypre_printf(" Current CommPkgCreate:\n"); if (!myid) hypre_printf(" run time = %f sec.\n", loop_times[0]); if (!myid) hypre_printf("********************************************************\n" ); } /*----------------------------------------------------------- * Verbose printing *-----------------------------------------------------------*/ /*some verification*/ if (verbose) { ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); comm_pkg = hypre_ParCSRMatrixCommPkg(parcsr_A); hypre_printf("myid = %i, std - my local range: [%i, %i]\n", myid, row_start, row_end); ierr = hypre_ParCSRMatrixGetLocalRange( parcsr_A, &row_start, &row_end , &col_start, &col_end ); hypre_printf("myid = %d, std - num_recvs = %d\n", myid, hypre_ParCSRCommPkgNumRecvs(comm_pkg) ); #if mydebug for (i=0; i < hypre_ParCSRCommPkgNumRecvs(comm_pkg); i++) { hypre_printf("myid = %d, std - recv proc = %d, vec_starts = [%d : %d]\n", myid, hypre_ParCSRCommPkgRecvProcs(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i], hypre_ParCSRCommPkgRecvVecStarts(comm_pkg)[i+1]-1); } #endif hypre_printf("myid = %d, std - num_sends = %d\n", myid, hypre_ParCSRCommPkgNumSends(comm_pkg)); #if mydebug for (i=0; i <hypre_ParCSRCommPkgNumSends(comm_pkg) ; i++) { tmp_int = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i+1] - hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; index = hypre_ParCSRCommPkgSendMapStarts(comm_pkg)[i]; for (j=0; j< tmp_int; j++) { hypre_printf("myid = %d, std - send proc = %d, send element = %d\n",myid, hypre_ParCSRCommPkgSendProcs(comm_pkg)[i], hypre_ParCSRCommPkgSendMapElmts(comm_pkg)[index+j]); } } #endif } /*----------------------------------------------------------- * Verify correctness *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { global_num_rows = hypre_ParCSRMatrixGlobalNumRows(parcsr_A); row_starts = hypre_ParCSRMatrixRowStarts(parcsr_A); y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD, global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y, 0); hypre_ParVectorInitialize(y); hypre_ParVectorSetConstantValues(y, 0.0); hypre_ParCSRMatrixMatvec (1.0, parcsr_A, x_new, 1.0, y); } } /*----------------------------------------------------------- * Compare matvecs for both comm packages (3) *-----------------------------------------------------------*/ if (commpkg_flag == 3 ) { /*make sure that y and y_new are the same - now y_new should=0*/ hypre_ParVectorAxpy( -1.0, y, y_new ); hypre_ParVectorSetRandomValues(y, 1); ans = hypre_ParVectorInnerProd( y, y_new ); if (!myid) { if ( fabs(ans) > 1e-8 ) { hypre_printf("!!!!! WARNING !!!!! should be zero if correct = %6.10f\n", ans); } else { hypre_printf("Matvecs match ( should be zero = %6.10f )\n", ans); } } } /*----------------------------------------------------------- * Clean up *-----------------------------------------------------------*/ hypre_ParCSRMatrixDestroy(parcsr_A); /*this calls the standard comm package destroy - but we'll destroy ours separately until it is incorporated */ if (commpkg_flag == 3 ) { hypre_ParVectorDestroy(x_new); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y_new); } hypre_MPI_Finalize(); return(ierr); }
HYPRE_Int hypre_ParChordMatrixToParCSRMatrix( hypre_ParChordMatrix *Ac, MPI_Comm comm, hypre_ParCSRMatrix **pAp ) { /* Some parts of this function are copied from hypre_CSRMatrixToParCSRMatrix. */ hypre_ParCSRMatrix *Ap; HYPRE_Int *row_starts, *col_starts; HYPRE_Int global_num_rows, global_num_cols, my_id, num_procs; HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd; HYPRE_Int *local_num_rows; /* not computed HYPRE_Int *local_num_nonzeros; */ HYPRE_Int num_nonzeros, first_col_diag, last_col_diag; HYPRE_Int i,ic,ij,ir,ilocal,p,r,r_p,r_global,r_local, jlen; HYPRE_Int *a_i, *a_j, *ilen; HYPRE_Int **rdofs, **ps; double data; double *a_data; double **datas; hypre_CSRMatrix *local_A; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); hypre_ParChordMatrix_RowStarts ( Ac, comm, &row_starts, &global_num_cols ); /* ... this function works correctly only under some assumptions; see the function definition for details */ global_num_rows = row_starts[num_procs] - row_starts[0]; col_starts = NULL; /* The offd and diag blocks aren't defined until we have both row and column partitions... */ num_cols_offd = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; Ap = hypre_ParCSRMatrixCreate( comm, global_num_rows, global_num_cols, row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); *pAp = Ap; row_starts = hypre_ParCSRMatrixRowStarts(Ap); col_starts = hypre_ParCSRMatrixColStarts(Ap); local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; num_nonzeros = 0; for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { num_nonzeros += hypre_ParChordMatrixNumInchords(Ac)[p]; }; local_A = hypre_CSRMatrixCreate( local_num_rows[my_id], global_num_cols, num_nonzeros ); /* Compute local CSRMatrix-like i,j arrays for this processor. */ ps = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); rdofs = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); datas = hypre_CTAlloc( double*, hypre_ParChordMatrixNumIdofs(Ac) ); ilen = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac) ); jlen = 0; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { ilen[i] = 0; ps[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); rdofs[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); datas[i] = hypre_CTAlloc( double, hypre_ParChordMatrixNumRdofs(Ac) ); /* ... rdofs[i], datas[i] will generally, not always, be much too big */ } for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { for ( ic=0; ic<hypre_ParChordMatrixNumInchords(Ac)[p]; ++ic ) { ilocal = hypre_ParChordMatrixInchordIdof(Ac)[p][ic]; r = hypre_ParChordMatrixInchordRdof(Ac)[p][ic]; data = hypre_ParChordMatrixInchordData(Ac)[p][ic]; ps[ilocal][ ilen[ilocal] ] = p; rdofs[ilocal][ ilen[ilocal] ] = r; datas[ilocal][ ilen[ilocal] ] = data; ++ilen[ilocal]; ++jlen; } }; a_i = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac)+1 ); a_j = hypre_CTAlloc( HYPRE_Int, jlen ); a_data = hypre_CTAlloc( double, jlen ); a_i[0] = 0; for ( ilocal=0; ilocal<hypre_ParChordMatrixNumIdofs(Ac); ++ilocal ) { a_i[ilocal+1] = a_i[ilocal] + ilen[ilocal]; ir = 0; for ( ij=a_i[ilocal]; ij<a_i[ilocal+1]; ++ij ) { p = ps[ilocal][ir]; r_p = rdofs[ilocal][ir]; /* local in proc. p */ r_global = r_p + hypre_ParChordMatrixFirstindexRdof(Ac)[p]; r_local = r_global - hypre_ParChordMatrixFirstindexRdof(Ac)[my_id]; a_j[ij] = r_local; a_data[ij] = datas[ilocal][ir]; ir++; }; }; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { hypre_TFree( ps[i] ); hypre_TFree( rdofs[i] ); hypre_TFree( datas[i] ); }; hypre_TFree( ps ); hypre_TFree( rdofs ); hypre_TFree( datas ); hypre_TFree( ilen ); first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; hypre_CSRMatrixData(local_A) = a_data; hypre_CSRMatrixI(local_A) = a_i; hypre_CSRMatrixJ(local_A) = a_j; hypre_CSRMatrixOwnsData(local_A) = 0; GenerateDiagAndOffd(local_A, Ap, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_TFree(a_data); /* ... the data has been copied into different diag & offd arrays of Ap */ hypre_TFree(a_j); hypre_TFree(a_i); hypre_CSRMatrixData(local_A) = NULL; hypre_CSRMatrixI(local_A) = NULL; hypre_CSRMatrixJ(local_A) = NULL; } hypre_CSRMatrixDestroy(local_A); hypre_TFree(local_num_rows); /* hypre_TFree(csr_matrix_datatypes);*/ return 0; }