HYPRE_Int hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A) { hypre_ParCSRCommPkg *comm_pkg; MPI_Comm comm = hypre_ParCSRMatrixComm(A); /* hypre_MPI_Datatype *recv_mpi_types; hypre_MPI_Datatype *send_mpi_types; */ HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A); HYPRE_Int ierr = 0; HYPRE_Int num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)); HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A); hypre_MatTCommPkgCreate_core ( comm, col_map_offd, first_col_diag, col_starts, num_rows_diag, num_cols_diag, num_cols_offd, row_starts, hypre_ParCSRMatrixFirstColDiag(A), hypre_ParCSRMatrixColMapOffd(A), hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ), 1, &num_recvs, &recv_procs, &recv_vec_starts, &num_sends, &send_procs, &send_map_starts, &send_map_elmts ); comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1); hypre_ParCSRCommPkgComm(comm_pkg) = comm; hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs; hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts; hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends; hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs; hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts; hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts; hypre_ParCSRMatrixCommPkgT(A) = comm_pkg; return ierr; }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }
/****************************************************************************** * * hypre_IJMatrixInsertRowPETSc * * inserts a row into an IJMatrix, * if diag_i and offd_i are known, those values are inserted directly * into the ParCSRMatrix, * if they are not known, an auxiliary structure, AuxParCSRMatrix is used * *****************************************************************************/ HYPRE_Int hypre_IJMatrixInsertRowPETSc(hypre_IJMatrix *matrix, HYPRE_Int n, HYPRE_Int row, HYPRE_Int *indices, double *coeffs) { HYPRE_Int ierr = 0; hypre_ParCSRMatrix *par_matrix; hypre_AuxParCSRMatrix *aux_matrix; HYPRE_Int *row_starts; HYPRE_Int *col_starts; MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int num_procs, my_id; HYPRE_Int row_local; HYPRE_Int col_0, col_n; HYPRE_Int i, temp; HYPRE_Int *indx_diag, *indx_offd; HYPRE_Int **aux_j; HYPRE_Int *local_j; double **aux_data; double *local_data; HYPRE_Int diag_space, offd_space; HYPRE_Int *row_length, *row_space; HYPRE_Int need_aux; HYPRE_Int indx_0; HYPRE_Int diag_indx, offd_indx; hypre_CSRMatrix *diag; HYPRE_Int *diag_i; HYPRE_Int *diag_j; double *diag_data; hypre_CSRMatrix *offd; HYPRE_Int *offd_i; HYPRE_Int *offd_j; double *offd_data; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); par_matrix = hypre_IJMatrixLocalStorage( matrix ); aux_matrix = hypre_IJMatrixTranslator(matrix); row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); row_length = hypre_AuxParCSRMatrixRowLength(aux_matrix); col_n = hypre_ParCSRMatrixFirstColDiag(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); col_0 = col_starts[my_id]; col_n = col_starts[my_id+1]-1; need_aux = hypre_AuxParCSRMatrixNeedAux(aux_matrix); if (row >= row_starts[my_id] && row < row_starts[my_id+1]) { if (need_aux) { row_local = row - row_starts[my_id]; /* compute local row number */ aux_j = hypre_AuxParCSRMatrixAuxJ(aux_matrix); aux_data = hypre_AuxParCSRMatrixAuxData(aux_matrix); local_j = aux_j[row_local]; local_data = aux_data[row_local]; row_length[row_local] = n; if ( row_space[row_local] < n) { hypre_TFree(local_j); hypre_TFree(local_data); local_j = hypre_CTAlloc(HYPRE_Int,n); local_data = hypre_CTAlloc(double,n); row_space[row_local] = n; } for (i=0; i < n; i++) { local_j[i] = indices[i]; local_data[i] = coeffs[i]; } /* make sure first element is diagonal element, if not, find it and exchange it with first element */ if (local_j[0] != row_local) { for (i=1; i < n; i++) { if (local_j[i] == row_local) { local_j[i] = local_j[0]; local_j[0] = row_local; temp = local_data[0]; local_data[0] = local_data[i]; local_data[i] = temp; break; } } } /* sort data according to column indices, except for first element */ qsort1(local_j,local_data,1,n-1); } else /* insert immediately into data into ParCSRMatrix structure */ {
hypre_ParCSRBlockMatrix * hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix, HYPRE_Int matrix_C_block_size ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix); HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL; HYPRE_Int *matrix_C_col_map_offd = NULL; HYPRE_Int matrix_C_num_cols_offd; HYPRE_Int matrix_C_num_nonzeros_offd; HYPRE_Int num_rows, num_nodes; HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd); hypre_ParCSRBlockMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; hypre_CSRBlockMatrix *matrix_C_diag; hypre_CSRBlockMatrix *matrix_C_offd; HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL; HYPRE_Complex *matrix_C_offd_data = NULL; HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #endif /************* create the diagonal part ************/ matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, matrix_C_block_size); /******* the offd part *******************/ /* can't use the same function for the offd part - because this isn't square and the offd j entries aren't global numbering (have to consider the offd map) - need to look at col_map_offd first */ /* figure out the new number of offd columns (num rows is same as diag) */ num_cols_offd = hypre_CSRMatrixNumCols(offd); num_rows = hypre_CSRMatrixNumRows(diag); num_nodes = num_rows/matrix_C_block_size; matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1); matrix_C_num_cols_offd = 0; matrix_C_offd_i[0] = 0; matrix_C_num_nonzeros_offd = 0; if (num_cols_offd) { map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_num_cols_offd = 1; map_to_node[0] = col_map_offd[0]/matrix_C_block_size; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/matrix_C_block_size; if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++; } matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_col_map_offd[0] = map_to_node[0]; col_in_j_map[0] = 0; count = 1; j = 1; /* fill in the col_map_off_d - these are global numbers. Then we need to map these to j entries (these have local numbers) */ for (i=1; i < num_cols_offd; i++) { if (map_to_node[i] > map_to_node[i-1]) { matrix_C_col_map_offd[count++] = map_to_node[i]; } col_in_j_map[j++] = count - 1; } /* now figure the nonzeros */ matrix_C_num_nonzeros_offd = 0; counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; for (i=0; i < num_nodes; i++) /* for each block row */ { matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd; for (j=0; j < matrix_C_block_size; j++) { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has been in this block row (i) already*/ if (counter[k_map] < i) /* not yet counted for this nodal row */ { counter[k_map] = i; matrix_C_num_nonzeros_offd++; } } } } /* fill in final i entry */ matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd; } /* create offd matrix */ matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes, matrix_C_num_cols_offd, matrix_C_num_nonzeros_offd); /* assign i */ hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i; /* create (and allocate j and data) */ if (matrix_C_num_nonzeros_offd) { matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd); matrix_C_offd_data = hypre_CTAlloc(HYPRE_Complex, matrix_C_num_nonzeros_offd*matrix_C_block_size* matrix_C_block_size); hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j; hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data; for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; index = 0; /*keep track of entry in matrix_C_offd_j*/ start_index = 0; for (i=0; i < num_nodes; i++) /* for each block row */ { for (j=0; j < matrix_C_block_size; j++) /* for each row in block */ { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col for off_d */ if (counter[k_map] < start_index) /* not yet counted for this nodal row */ { counter[k_map] = index; matrix_C_offd_j[index] = k_map; /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; index ++; } else /* this col has already been listed for this row */ { /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; } } } start_index = index; /* first index for current nodal row */ } } /* *********create the new matrix *************/ matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, global_num_rows/matrix_C_block_size, global_num_cols/matrix_C_block_size, matrix_C_row_starts, matrix_C_col_starts, matrix_C_num_cols_offd, hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), matrix_C_num_nonzeros_offd); /* use the diag and off diag matrices we have already created */ hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag; hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd; hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd; /* *********don't bother to copy the comm_pkg *************/ hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL; /* CLEAN UP !!!! */ hypre_TFree(map_to_node); hypre_TFree(col_in_j_map); hypre_TFree(counter); return matrix_C; }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_CSRMatrix *matrix; hypre_CSRMatrix *matrix1; hypre_ParCSRMatrix *par_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_Vector *y2_local; hypre_ParVector *x; hypre_ParVector *x2; hypre_ParVector *y; hypre_ParVector *y2; HYPRE_Int vecstride_x, idxstride_x, vecstride_y, idxstride_y; HYPRE_Int num_procs, my_id; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int global_num_rows, global_num_cols; HYPRE_Int first_index; HYPRE_Int i, j, ierr=0; double *data, *data2; HYPRE_Int *row_starts, *col_starts; char file_name[80]; /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &my_id); hypre_printf(" my_id: %d num_procs: %d\n", my_id, num_procs); if (my_id == 0) { matrix = hypre_CSRMatrixRead("input"); hypre_printf(" read input\n"); } row_starts = NULL; col_starts = NULL; par_matrix = hypre_CSRMatrixToParCSRMatrix(hypre_MPI_COMM_WORLD, matrix, row_starts, col_starts); hypre_printf(" converted\n"); matrix1 = hypre_ParCSRMatrixToCSRMatrixAll(par_matrix); hypre_sprintf(file_name,"matrix1.%d",my_id); if (matrix1) hypre_CSRMatrixPrint(matrix1, file_name); hypre_ParCSRMatrixPrint(par_matrix,"matrix"); hypre_ParCSRMatrixPrintIJ(par_matrix,0,0,"matrixIJ"); par_matrix = hypre_ParCSRMatrixRead(hypre_MPI_COMM_WORLD,"matrix"); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); hypre_printf(" global_num_cols %d\n", global_num_cols); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); first_index = col_starts[my_id]; local_size = col_starts[my_id+1] - first_index; num_vectors = 3; x = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); vecstride_x = hypre_VectorVectorStride(x_local); idxstride_x = hypre_VectorIndexStride(x_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data[i*idxstride_x + j*vecstride_x] = first_index+i+1 + 100*j; x2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_cols, col_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(x2,0); hypre_ParVectorInitialize(x2); hypre_ParVectorSetConstantValues(x2,2.0); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); first_index = row_starts[my_id]; local_size = row_starts[my_id+1] - first_index; y = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); y_local = hypre_ParVectorLocalVector(y); y2 = hypre_ParMultiVectorCreate( hypre_MPI_COMM_WORLD, global_num_rows, row_starts, num_vectors ); hypre_ParVectorSetPartitioningOwner(y2,0); hypre_ParVectorInitialize(y2); y2_local = hypre_ParVectorLocalVector(y2); data2 = hypre_VectorData(y2_local); vecstride_y = hypre_VectorVectorStride(y2_local); idxstride_y = hypre_VectorIndexStride(y2_local); for ( j=0; j<num_vectors; ++j ) for (i=0; i < local_size; i++) data2[i*idxstride_y+j*vecstride_y] = first_index+i+1 + 100*j; hypre_ParVectorSetConstantValues(y,1.0); hypre_printf(" initialized vectors, first_index=%i\n", first_index); hypre_ParVectorPrint(x, "vectorx"); hypre_ParVectorPrint(y, "vectory"); hypre_MatvecCommPkgCreate(par_matrix); hypre_ParCSRMatrixMatvec ( 1.0, par_matrix, x, 1.0, y); hypre_printf(" did matvec\n"); hypre_ParVectorPrint(y, "result"); ierr = hypre_ParCSRMatrixMatvecT ( 1.0, par_matrix, y2, 1.0, x2); hypre_printf(" did matvecT %d\n", ierr); hypre_ParVectorPrint(x2, "transp"); hypre_ParCSRMatrixDestroy(par_matrix); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(x2); hypre_ParVectorDestroy(y); hypre_ParVectorDestroy(y2); if (my_id == 0) hypre_CSRMatrixDestroy(matrix); if (matrix1) hypre_CSRMatrixDestroy(matrix1); /* Finalize MPI */ hypre_MPI_Finalize(); return 0; }
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };
HYPRE_Int hypre_ParChordMatrixToParCSRMatrix( hypre_ParChordMatrix *Ac, MPI_Comm comm, hypre_ParCSRMatrix **pAp ) { /* Some parts of this function are copied from hypre_CSRMatrixToParCSRMatrix. */ hypre_ParCSRMatrix *Ap; HYPRE_Int *row_starts, *col_starts; HYPRE_Int global_num_rows, global_num_cols, my_id, num_procs; HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd; HYPRE_Int *local_num_rows; /* not computed HYPRE_Int *local_num_nonzeros; */ HYPRE_Int num_nonzeros, first_col_diag, last_col_diag; HYPRE_Int i,ic,ij,ir,ilocal,p,r,r_p,r_global,r_local, jlen; HYPRE_Int *a_i, *a_j, *ilen; HYPRE_Int **rdofs, **ps; double data; double *a_data; double **datas; hypre_CSRMatrix *local_A; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); hypre_ParChordMatrix_RowStarts ( Ac, comm, &row_starts, &global_num_cols ); /* ... this function works correctly only under some assumptions; see the function definition for details */ global_num_rows = row_starts[num_procs] - row_starts[0]; col_starts = NULL; /* The offd and diag blocks aren't defined until we have both row and column partitions... */ num_cols_offd = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; Ap = hypre_ParCSRMatrixCreate( comm, global_num_rows, global_num_cols, row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); *pAp = Ap; row_starts = hypre_ParCSRMatrixRowStarts(Ap); col_starts = hypre_ParCSRMatrixColStarts(Ap); local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; num_nonzeros = 0; for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { num_nonzeros += hypre_ParChordMatrixNumInchords(Ac)[p]; }; local_A = hypre_CSRMatrixCreate( local_num_rows[my_id], global_num_cols, num_nonzeros ); /* Compute local CSRMatrix-like i,j arrays for this processor. */ ps = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); rdofs = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); datas = hypre_CTAlloc( double*, hypre_ParChordMatrixNumIdofs(Ac) ); ilen = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac) ); jlen = 0; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { ilen[i] = 0; ps[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); rdofs[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); datas[i] = hypre_CTAlloc( double, hypre_ParChordMatrixNumRdofs(Ac) ); /* ... rdofs[i], datas[i] will generally, not always, be much too big */ } for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { for ( ic=0; ic<hypre_ParChordMatrixNumInchords(Ac)[p]; ++ic ) { ilocal = hypre_ParChordMatrixInchordIdof(Ac)[p][ic]; r = hypre_ParChordMatrixInchordRdof(Ac)[p][ic]; data = hypre_ParChordMatrixInchordData(Ac)[p][ic]; ps[ilocal][ ilen[ilocal] ] = p; rdofs[ilocal][ ilen[ilocal] ] = r; datas[ilocal][ ilen[ilocal] ] = data; ++ilen[ilocal]; ++jlen; } }; a_i = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac)+1 ); a_j = hypre_CTAlloc( HYPRE_Int, jlen ); a_data = hypre_CTAlloc( double, jlen ); a_i[0] = 0; for ( ilocal=0; ilocal<hypre_ParChordMatrixNumIdofs(Ac); ++ilocal ) { a_i[ilocal+1] = a_i[ilocal] + ilen[ilocal]; ir = 0; for ( ij=a_i[ilocal]; ij<a_i[ilocal+1]; ++ij ) { p = ps[ilocal][ir]; r_p = rdofs[ilocal][ir]; /* local in proc. p */ r_global = r_p + hypre_ParChordMatrixFirstindexRdof(Ac)[p]; r_local = r_global - hypre_ParChordMatrixFirstindexRdof(Ac)[my_id]; a_j[ij] = r_local; a_data[ij] = datas[ilocal][ir]; ir++; }; }; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { hypre_TFree( ps[i] ); hypre_TFree( rdofs[i] ); hypre_TFree( datas[i] ); }; hypre_TFree( ps ); hypre_TFree( rdofs ); hypre_TFree( datas ); hypre_TFree( ilen ); first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; hypre_CSRMatrixData(local_A) = a_data; hypre_CSRMatrixI(local_A) = a_i; hypre_CSRMatrixJ(local_A) = a_j; hypre_CSRMatrixOwnsData(local_A) = 0; GenerateDiagAndOffd(local_A, Ap, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_TFree(a_data); /* ... the data has been copied into different diag & offd arrays of Ap */ hypre_TFree(a_j); hypre_TFree(a_i); hypre_CSRMatrixData(local_A) = NULL; hypre_CSRMatrixI(local_A) = NULL; hypre_CSRMatrixJ(local_A) = NULL; } hypre_CSRMatrixDestroy(local_A); hypre_TFree(local_num_rows); /* hypre_TFree(csr_matrix_datatypes);*/ return 0; }