HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
hypre_ParCSRBlockMatrix * hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix, HYPRE_Int matrix_C_block_size ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix); HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL; HYPRE_Int *matrix_C_col_map_offd = NULL; HYPRE_Int matrix_C_num_cols_offd; HYPRE_Int matrix_C_num_nonzeros_offd; HYPRE_Int num_rows, num_nodes; HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd); hypre_ParCSRBlockMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; hypre_CSRBlockMatrix *matrix_C_diag; hypre_CSRBlockMatrix *matrix_C_offd; HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL; HYPRE_Complex *matrix_C_offd_data = NULL; HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #endif /************* create the diagonal part ************/ matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, matrix_C_block_size); /******* the offd part *******************/ /* can't use the same function for the offd part - because this isn't square and the offd j entries aren't global numbering (have to consider the offd map) - need to look at col_map_offd first */ /* figure out the new number of offd columns (num rows is same as diag) */ num_cols_offd = hypre_CSRMatrixNumCols(offd); num_rows = hypre_CSRMatrixNumRows(diag); num_nodes = num_rows/matrix_C_block_size; matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1); matrix_C_num_cols_offd = 0; matrix_C_offd_i[0] = 0; matrix_C_num_nonzeros_offd = 0; if (num_cols_offd) { map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_num_cols_offd = 1; map_to_node[0] = col_map_offd[0]/matrix_C_block_size; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/matrix_C_block_size; if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++; } matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_col_map_offd[0] = map_to_node[0]; col_in_j_map[0] = 0; count = 1; j = 1; /* fill in the col_map_off_d - these are global numbers. Then we need to map these to j entries (these have local numbers) */ for (i=1; i < num_cols_offd; i++) { if (map_to_node[i] > map_to_node[i-1]) { matrix_C_col_map_offd[count++] = map_to_node[i]; } col_in_j_map[j++] = count - 1; } /* now figure the nonzeros */ matrix_C_num_nonzeros_offd = 0; counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; for (i=0; i < num_nodes; i++) /* for each block row */ { matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd; for (j=0; j < matrix_C_block_size; j++) { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has been in this block row (i) already*/ if (counter[k_map] < i) /* not yet counted for this nodal row */ { counter[k_map] = i; matrix_C_num_nonzeros_offd++; } } } } /* fill in final i entry */ matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd; } /* create offd matrix */ matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes, matrix_C_num_cols_offd, matrix_C_num_nonzeros_offd); /* assign i */ hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i; /* create (and allocate j and data) */ if (matrix_C_num_nonzeros_offd) { matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd); matrix_C_offd_data = hypre_CTAlloc(HYPRE_Complex, matrix_C_num_nonzeros_offd*matrix_C_block_size* matrix_C_block_size); hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j; hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data; for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; index = 0; /*keep track of entry in matrix_C_offd_j*/ start_index = 0; for (i=0; i < num_nodes; i++) /* for each block row */ { for (j=0; j < matrix_C_block_size; j++) /* for each row in block */ { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col for off_d */ if (counter[k_map] < start_index) /* not yet counted for this nodal row */ { counter[k_map] = index; matrix_C_offd_j[index] = k_map; /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; index ++; } else /* this col has already been listed for this row */ { /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; } } } start_index = index; /* first index for current nodal row */ } } /* *********create the new matrix *************/ matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, global_num_rows/matrix_C_block_size, global_num_cols/matrix_C_block_size, matrix_C_row_starts, matrix_C_col_starts, matrix_C_num_cols_offd, hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), matrix_C_num_nonzeros_offd); /* use the diag and off diag matrices we have already created */ hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag; hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd; hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd; /* *********don't bother to copy the comm_pkg *************/ hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL; /* CLEAN UP !!!! */ hypre_TFree(map_to_node); hypre_TFree(col_in_j_map); hypre_TFree(counter); return matrix_C; }
hypre_CSRBlockMatrix * hypre_ParCSRBlockMatrixExtractBExt(hypre_ParCSRBlockMatrix *B, hypre_ParCSRBlockMatrix *A, HYPRE_Int data) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(B); HYPRE_Int first_col_diag = hypre_ParCSRBlockMatrixFirstColDiag(B); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(B); HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(B); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *tmp_comm_pkg; hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(B); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRBlockMatrixJ(diag); HYPRE_Complex *diag_data = hypre_CSRBlockMatrixData(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(B); HYPRE_Int *offd_i = hypre_CSRBlockMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRBlockMatrixJ(offd); HYPRE_Complex *offd_data = hypre_CSRBlockMatrixData(offd); HYPRE_Int *B_int_i; HYPRE_Int *B_int_j; HYPRE_Complex *B_int_data; HYPRE_Int num_cols_B, num_nonzeros; HYPRE_Int num_rows_B_ext; HYPRE_Int num_procs, my_id; hypre_CSRBlockMatrix *B_ext; HYPRE_Int *B_ext_i; HYPRE_Int *B_ext_j; HYPRE_Complex *B_ext_data; HYPRE_Int *jdata_recv_vec_starts; HYPRE_Int *jdata_send_map_starts; HYPRE_Int i, j, k, l, counter, bnnz; HYPRE_Int start_index; HYPRE_Int j_cnt, jrow; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); bnnz = block_size * block_size; num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B); num_rows_B_ext = recv_vec_starts[num_recvs]; B_int_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]+1); B_ext_i = hypre_CTAlloc(HYPRE_Int, num_rows_B_ext+1); /*-------------------------------------------------------------------------- * generate B_int_i through adding number of row-elements of offd and diag * for corresponding rows. B_int_i[j+1] contains the number of elements of * a row j (which is determined through send_map_elmts) *--------------------------------------------------------------------------*/ B_int_i[0] = 0; j_cnt = 0; num_nonzeros = 0; for (i=0; i < num_sends; i++) { for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++) { jrow = send_map_elmts[j]; B_int_i[++j_cnt] = offd_i[jrow+1] - offd_i[jrow] + diag_i[jrow+1] - diag_i[jrow]; num_nonzeros += B_int_i[j_cnt]; } } /*-------------------------------------------------------------------------- * initialize communication *--------------------------------------------------------------------------*/ comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg, &B_int_i[1],&B_ext_i[1]); B_int_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); if (data) B_int_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz); jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1); jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1); start_index = B_int_i[0]; jdata_send_map_starts[0] = start_index; counter = 0; for (i=0; i < num_sends; i++) { num_nonzeros = counter; for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++) { jrow = send_map_elmts[j]; for (k=diag_i[jrow]; k < diag_i[jrow+1]; k++) { B_int_j[counter] = diag_j[k]+first_col_diag; if (data) { for(l = 0; l < bnnz; l++) B_int_data[counter*bnnz+ l] = diag_data[k*bnnz+ l]; } counter++; } for (k=offd_i[jrow]; k < offd_i[jrow+1]; k++) { B_int_j[counter] = col_map_offd[offd_j[k]]; if (data) { for(l = 0; l < bnnz; l++) B_int_data[counter*bnnz+ l] = offd_data[k*bnnz+ l]; } counter++; } } num_nonzeros = counter - num_nonzeros; start_index += num_nonzeros; jdata_send_map_starts[i+1] = start_index; } tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm; hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends; hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs; hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgSendProcs(comm_pkg); hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgRecvProcs(comm_pkg); hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts; hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; /*-------------------------------------------------------------------------- * after communication exchange B_ext_i[j+1] contains the number of elements * of a row j ! * evaluate B_ext_i and compute num_nonzeros for B_ext *--------------------------------------------------------------------------*/ for (i=0; i < num_recvs; i++) for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) B_ext_i[j+1] += B_ext_i[j]; num_nonzeros = B_ext_i[num_rows_B_ext]; B_ext = hypre_CSRBlockMatrixCreate(block_size, num_rows_B_ext, num_cols_B, num_nonzeros); B_ext_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); if (data) B_ext_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz); for (i=0; i < num_recvs; i++) { start_index = B_ext_i[recv_vec_starts[i]]; num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index; jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]]; } hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts; comm_handle = hypre_ParCSRCommHandleCreate(11,tmp_comm_pkg,B_int_j,B_ext_j); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (data) { comm_handle = hypre_ParCSRBlockCommHandleCreate(1, bnnz,tmp_comm_pkg, B_int_data, B_ext_data); hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; } hypre_CSRBlockMatrixI(B_ext) = B_ext_i; hypre_CSRBlockMatrixJ(B_ext) = B_ext_j; if (data) hypre_CSRBlockMatrixData(B_ext) = B_ext_data; hypre_TFree(B_int_i); hypre_TFree(B_int_j); if (data) hypre_TFree(B_int_data); hypre_TFree(jdata_send_map_starts); hypre_TFree(jdata_recv_vec_starts); hypre_TFree(tmp_comm_pkg); return B_ext; }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
hypre_CSRMatrix * hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A, HYPRE_Int data, HYPRE_Int ** pA_ext_row_map ) { /* Note that A's role as the first factor in A*A^T is used only through ...CommPkgT(A), which basically says which rows of A (columns of A^T) are needed. In all the other places where A serves as an input, it is through its role as A^T, the matrix whose data needs to be passed between processors. */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A); /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */ HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); HYPRE_Int num_cols_A, num_nonzeros; HYPRE_Int num_rows_A_ext; hypre_CSRMatrix *A_ext; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Complex *A_ext_data; num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); num_rows_A_ext = recv_vec_starts[num_recvs]; hypre_ParCSRMatrixExtractBExt_Arrays ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map, &num_nonzeros, data, 1, comm, comm_pkg, num_cols_A, num_recvs, num_sends, first_col_diag, first_row_index, recv_vec_starts, send_map_starts, send_map_elmts, diag_i, diag_j, offd_i, offd_j, col_map_offd, diag_data, offd_data ); A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros); hypre_CSRMatrixI(A_ext) = A_ext_i; hypre_CSRMatrixJ(A_ext) = A_ext_j; if (data) hypre_CSRMatrixData(A_ext) = A_ext_data; return A_ext; }
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_ext_row_map; HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; hypre_CSRMatrix *C_diag; HYPRE_Complex *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; HYPRE_Complex *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int *new_C_offd_j; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int last_col_diag_C; HYPRE_Int num_cols_offd_C; hypre_CSRMatrix *A_ext; HYPRE_Complex *A_ext_data; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Int num_rows_A_ext=0; HYPRE_Int first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *B_marker; HYPRE_Int i; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int count; HYPRE_Int n_rows_A, n_cols_A; HYPRE_Complex a_entry; HYPRE_Complex a_b_product; HYPRE_Complex zero = 0.0; n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); if (n_cols_A != n_rows_A) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /*----------------------------------------------------------------------- * Extract A_ext, i.e. portion of A that is stored on neighbor procs * and needed locally for A^T in the matrix matrix product A*A^T *-----------------------------------------------------------------------*/ if (num_rows_diag_A != n_rows_A) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!hypre_ParCSRMatrixCommPkg(A)) { hypre_MatTCommPkgCreate(A); } A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map ); A_ext_data = hypre_CSRMatrixData(A_ext); A_ext_i = hypre_CSRMatrixI(A_ext); A_ext_j = hypre_CSRMatrixJ(A_ext); num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext); } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext ); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } hypre_ParAat_RowSizes( &C_diag_i, &C_offd_i, B_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, A_col_map_offd, A_ext_i, A_ext_j, A_ext_row_map, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, num_rows_A_ext, first_col_diag_A, first_row_index_A ); #if 0 /* debugging output: */ hypre_printf("A_ext_row_map (%i):",num_rows_A_ext); for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] ); hypre_printf("\nC_diag_i (%i):",C_diag_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] ); hypre_printf("\nC_offd_i (%i):",C_offd_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] ); hypre_printf("\n"); #endif /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_C = first_row_index_A + num_rows_diag_A - 1; C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ B_marker[i1] = jj_count_diag; jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; C_diag_data[jj_count_diag] = zero; C_diag_j[jj_count_diag] = i1; jj_count_diag++; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ /* There are 3 CSRMatrix or CSRBooleanMatrix objects here: ext*ext, ext*diag, and ext*offd belong to another processor. diag*offd and offd*diag don't count - never share a column by definition. So we have to do 4 cases: diag*ext, offd*ext, diag*diag, and offd*offd. */ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /* diag*ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==i2+first_col_diag_A ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_col_diag_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; /* offd * ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==A_col_map_offd[i2] ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_row_index_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } } /* diag * diag */ /*----------------------------------------------------------------- * Loop over entries (columns) i2 in row i1 of A_diag. * For each such column we will find the contributions of the * corresponding rows i2 of A^T to C=A*A^T . Now we only look * at the local part of A^T - with columns (rows of A) living * on this processor. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the diagonal block of A. * It contributes to the diagonal block of C. * For each entry (i2,i3) of A^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_diag_A; i3++ ) { for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) { if ( A_diag_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_diag_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, mark it and increment * counter. *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of i3 loop */ } /* end of third i2 loop */ /* offd * offd */ /*----------------------------------------------------------- * Loop over offd columns i2 of A in A*A^T. Then * loop over offd entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the off-diagonal block of A. * It contributes to the diag block of C. * For each entry (i2,i3) of A^T, add A*A^T to C *-----------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; for ( i3=0; i3<num_rows_diag_A; i3++ ) { /* ... note that num_rows_diag_A == num_rows_offd_A */ for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) { if ( A_offd_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_offd_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of last i3 loop */ } /* end of if (num_cols_offd_A) */ } /* end of fourth and last i2 loop */ #if 0 /* debugging printout */ hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag ); hypre_printf(" C_diag_j="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]); hypre_printf(" C_diag_data="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]); hypre_printf("\n"); hypre_printf(" C_offd_j="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]); hypre_printf(" C_offd_data="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]); hypre_printf("\n"); hypre_printf( " B_marker =" ); for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it ) hypre_printf(" %i", B_marker[it] ); hypre_printf( "\n" ); #endif } /* end of i1 loop */ /*----------------------------------------------------------------------- * Delete 0-columns in C_offd, i.e. generate col_map_offd and reset * C_offd_j. Note that (with the indexing we have coming into this * block) col_map_offd_C[i3]==A_ext_row_map[i3]. *-----------------------------------------------------------------------*/ for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i ) B_marker[i] = -1; for ( i=0; i<C_offd_size; i++ ) B_marker[ C_offd_j[i] ] = -2; count = 0; for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) { if (B_marker[i] == -2) { B_marker[i] = count; count++; } } num_cols_offd_C = count; if (num_cols_offd_C) { col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size); /* ... a bit big, but num_cols_offd_C is too small. It might be worth computing the correct size, which is sum( no. columns in row i, over all rows i ) */ for (i=0; i < C_offd_size; i++) { new_C_offd_j[i] = B_marker[C_offd_j[i]]; col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ]; } hypre_TFree(C_offd_j); C_offd_j = new_C_offd_j; } /*---------------------------------------------------------------- * Create C *----------------------------------------------------------------*/ C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A, row_starts_A, num_cols_offd_C, C_diag_size, C_offd_size); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; if (num_cols_offd_C) { C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixOffd(C) = C_offd; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } else hypre_TFree(C_offd_i); /*----------------------------------------------------------------------- * Free B_ext and marker array. *-----------------------------------------------------------------------*/ if (num_cols_offd_A) { hypre_CSRMatrixDestroy(A_ext); A_ext = NULL; } hypre_TFree(B_marker); if ( num_rows_diag_A != n_rows_A ) hypre_TFree(A_ext_row_map); return C; }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_CSRMatrix *matrix; hypre_CSRMatrix *matrix1; hypre_ParCSRMatrix *par_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_Vector *y2_local; hypre_ParVector *x; hypre_ParVector *x2; hypre_ParVector *y; hypre_ParVector *y2; HYPRE_Int vecstride_x, idxstride_x, vecstride_y, idxstride_y; HYPRE_Int num_procs, my_id; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int global_num_rows, global_num_cols; HYPRE_Int first_index; HYPRE_Int i, j, ierr=0; double *data, *data2; HYPRE_Int *row_starts, *col_starts; char file_name[80]; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); if (my_id == 0) { matrix = hypre_CSRMatrixRead("input"); hypre_printf(" read input\n"); } row_starts = NULL; col_starts = NULL; par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, row_starts, col_starts); hypre_printf(" converted\n"); matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix); hypre_sprintf(file_name,"matrix1.%d",my_id); if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name); hypre_ParCSRMatrixPrint(par_matrix,"matrix"); hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ"); par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix"); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); hypre_printf(" global_num_cols %d\n", global_num_cols); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); first_index = col_starts[my_id]; local_size = col_starts[my_id+1] - first_index; num_vectors = 3; x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); vecstride_x = hypre_VectorVectorStride(x_local); idxstride_x = hypre_VectorIndexStride(x_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j; x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x2,0); hypre_ParVectorInitialize(x2); hypre_ParVectorSetConstantValues(x2,2.0); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); first_index = row_starts[my_id]; local_size = row_starts[my_id+1] - first_index; y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); y_local = hypre_ParVectorLocalVector(y); y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y2,0); hypre_ParVectorInitialize(y2); y2_local = hypre_ParVectorLocalVector(y2); data2 = hypre_VectorData(y2_local); vecstride_y = hypre_VectorVectorStride(y2_local); idxstride_y = hypre_VectorIndexStride(y2_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j; hypre_ParVectorSetConstantValues(y,1.0); hypre_printf(" initialized vectors, first_index=%i\n", first_index); hypre_ParVectorPrint(x, "vectorx"); hypre_ParVectorPrint(y, "vectory"); hypre_MatvecCommPkgCreate(par_matrix); hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y); hypre_printf(" did matvec\n"); hypre_ParVectorPrint(y, "result"); ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2); hypre_printf(" did matvecT %d\n", ierr); hypre_ParVectorPrint(x2, "transp"); hypre_ParCSRMatrixDestroy(par_matrix); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(x2); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y2); if (my_id == 0) hypre_CSRMatrixDestroy(matrix); if (matrix1) hypre_CSRMatrixDestroy(matrix1); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }