hypre_ParCSRMatrix * hypre_ParCSRBlockMatrixCompress( hypre_ParCSRBlockMatrix *matrix ) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag); HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd); hypre_ParCSRMatrix *matrix_C; HYPRE_Int i; matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows, global_num_cols, row_starts,col_starts,num_cols_offd,num_nonzeros_diag,num_nonzeros_offd); hypre_ParCSRMatrixInitialize(matrix_C); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixCompress(diag); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixCompress(offd); for(i = 0; i < num_cols_offd; i++) hypre_ParCSRMatrixColMapOffd(matrix_C)[i] = hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]; return matrix_C; }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, HYPRE_Real alpha, HYPRE_Real eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Real *value; HYPRE_Real ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(HYPRE_Real,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { diag_j[cnt] = row_index-nx_local-1 ; diag_data[cnt++] = value[3]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { if (ix > nx_part[p]) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; if (ix < nx_part[p+1]-1) { diag_j[cnt] = row_index+nx_local+1 ; diag_data[cnt++] = value[3]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; if (ix < nx_part[p+1]-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix < nx-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } row_index++; } } if (num_procs > 1) { work = hypre_CTAlloc(HYPRE_Int,o_cnt); for (i=0; i < o_cnt; i++) work[i] = offd_j[i]; qsort0(work, 0, o_cnt-1); col_map_offd[0] = work[0]; cnt = 0; for (i=0; i < o_cnt; i++) { if (work[i] > col_map_offd[cnt]) { cnt++; col_map_offd[cnt] = work[i]; } } for (i=0; i < o_cnt; i++) { for (j=0; j < num_cols_offd; j++) { if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } } hypre_TFree(work); } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(value); return (HYPRE_ParCSRMatrix) A; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
hypre_ParCSRMatrix * hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix); HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag); HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd); hypre_ParCSRMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; HYPRE_Int *counter, *new_j_map; HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0; HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd; HYPRE_Int num_procs, i, j; hypre_CSRMatrix *diag_nozeros, *offd_nozeros; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #endif matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, global_num_cols*block_size, matrix_C_row_starts, matrix_C_col_starts, num_cols_offd*block_size, num_nonzeros_diag*block_size*block_size, num_nonzeros_offd*block_size*block_size); hypre_ParCSRMatrixInitialize(matrix_C); /* DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(diag); /* AB - added to delete zeros */ diag_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixDiag(matrix_C), 1e-14); if(diag_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros; } /* OFF-DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(offd); /* AB - added to delete zeros - this just deletes from data and j arrays */ offd_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixOffd(matrix_C), 1e-14); if(offd_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros; removed = 1; } /* now convert the col_map_offd */ for (i = 0; i < num_cols_offd; i++) for (j = 0; j < block_size; j++) hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j; /* if we deleted zeros, then it is possible that col_map_offd can be compressed as well - this requires some amount of work that could be skipped... */ if (removed) { size_map = num_cols_offd*block_size; counter = hypre_CTAlloc(HYPRE_Int, size_map); new_j_map = hypre_CTAlloc(HYPRE_Int, size_map); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C)); col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C); size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C)); /* mark which off_d entries are found in j */ for (i=0; i < size_j; i++) { counter[offd_j[i]] = 1; } /*now find new numbering for columns (we will delete the cols where counter = 0*/ index = 0; for (i=0; i < size_map; i++) { if (counter[i]) new_j_map[i] = index++; } new_num_cols = index; /* if there are some col entries to remove: */ if (!(index == size_map)) { /* go thru j and adjust entries */ for (i=0; i < size_j; i++) { offd_j[i] = new_j_map[offd_j[i]]; } /*now go thru col map and get rid of non-needed entries */ new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols); index = 0; for (i=0; i < size_map; i++) { if (counter[i]) { new_col_map_offd[index++] = col_map_offd[i]; } } /* set the new col map */ hypre_TFree(col_map_offd); hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd; /* modify the number of cols */ hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols; } hypre_TFree(new_j_map); hypre_TFree(counter); } hypre_ParCSRMatrixSetNumNonzeros( matrix_C ); hypre_ParCSRMatrixSetDNumNonzeros( matrix_C ); /* we will not copy the comm package */ hypre_ParCSRMatrixCommPkg(matrix_C) = NULL; return matrix_C; }
HYPRE_Int hypre_BoomerAMGBlockCreateNodalA(hypre_ParCSRBlockMatrix *A, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A); hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag); HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag); HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag); HYPRE_Int bnnz = block_size*block_size; hypre_CSRBlockMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd); HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A); HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j=NULL; HYPRE_Real *AN_diag_data = NULL; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j = NULL; HYPRE_Real *AN_offd_data = NULL; HYPRE_Int *col_map_offd_AN = NULL; HYPRE_Int *row_starts_AN; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN = NULL; HYPRE_Int *send_procs_AN = NULL; HYPRE_Int *send_map_starts_AN = NULL; HYPRE_Int *send_map_elmts_AN = NULL; HYPRE_Int *recv_procs_AN = NULL; HYPRE_Int *recv_vec_starts_AN = NULL; HYPRE_Int i; HYPRE_Int ierr = 0; HYPRE_Int num_procs; HYPRE_Int cnt; HYPRE_Int norm_type; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int index, k; HYPRE_Real tmp; HYPRE_Real sum; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } norm_type = fabs(option); /* Set up the new matrix AN */ #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]; } #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]; } #endif global_num_nodes = hypre_ParCSRBlockMatrixGlobalNumRows(A); num_nodes = hypre_CSRBlockMatrixNumRows(A_diag); /* the diag part */ num_nonzeros_diag = A_diag_i[num_nodes]; AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); for (i=0; i <= num_nodes; i++) { AN_diag_i[i] = A_diag_i[i]; } AN_diag_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes, num_nodes, num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i< num_nonzeros_diag; i++) { AN_diag_j[i] = A_diag_j[i]; hypre_CSRBlockMatrixBlockNorm(norm_type, &A_diag_data[i*bnnz], &tmp, block_size); AN_diag_data[i] = tmp; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (NEED * to get more below!)*/ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = -AN_diag_data[index]; } } /* copy the commpkg */ if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int, num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int, num_sends+1); send_map_starts_AN[0] = 0; for (i=0; i < num_sends; i++) { send_procs_AN[i] = send_procs[i]; send_map_starts_AN[i+1] = send_map_starts[i+1]; } cnt = send_map_starts_AN[num_sends]; for (i=0; i< cnt; i++) { send_map_elmts_AN[i] = send_map_elmts[i]; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int, num_recvs+1); if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int, num_recvs); recv_vec_starts_AN[0] = recv_vec_starts[0]; for (i=0; i < num_recvs; i++) { recv_procs_AN[i] = recv_procs[i]; recv_vec_starts_AN[i+1] = recv_vec_starts[i+1]; } hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } /* the off-diag part */ num_cols_offd = hypre_CSRBlockMatrixNumCols(A_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int, num_cols_offd); for (i=0; i < num_cols_offd; i++) { col_map_offd_AN[i] = col_map_offd[i]; } num_nonzeros_offd = A_offd_i[num_nodes]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); for (i=0; i <= num_nodes; i++) { AN_offd_i[i] = A_offd_i[i]; } AN_offd_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_offd); for (i=0; i< num_nonzeros_offd; i++) { AN_offd_j[i] = A_offd_j[i]; hypre_CSRBlockMatrixBlockNorm(norm_type, &A_offd_data[i*bnnz], &tmp, block_size); AN_offd_data[i] = tmp; } AN_offd = hypre_CSRMatrixCreate(num_nodes, num_cols_offd, num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } /* now create AN */ AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; *AN_ptr = AN; return (ierr); }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/****************************************************************************** * * hypre_IJMatrixCreatePETSc * * creates AuxParCSRMatrix and ParCSRMatrix if necessary, * generates arrays row_starts and col_starts using either previously * set data local_m and local_n (user defined) or generates them evenly * distributed if not previously defined by user. * *****************************************************************************/ HYPRE_Int hypre_IJMatrixCreatePETSc(hypre_IJMatrix *matrix) { MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int global_m = hypre_IJMatrixM(matrix); HYPRE_Int global_n = hypre_IJMatrixN(matrix); hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix); HYPRE_Int local_m; HYPRE_Int local_n; HYPRE_Int ierr = 0; HYPRE_Int *row_starts; HYPRE_Int *col_starts; HYPRE_Int num_cols_offd = 0; HYPRE_Int num_nonzeros_diag = 0; HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_procs, my_id; HYPRE_Int equal; HYPRE_Int i; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); if (aux_matrix) { local_m = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix); local_n = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix); } else { aux_matrix = hypre_AuxParCSRMatrixCreate(-1,-1,NULL); local_m = -1; local_n = -1; hypre_IJMatrixTranslator(matrix) = aux_matrix; } if (local_m < 0) { row_starts = NULL; } else { row_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1); if (my_id == 0 && local_m == global_m) { row_starts[1] = local_m; } else { hypre_MPI_Allgather(&local_m,1,HYPRE_MPI_INT,&row_starts[1],1,HYPRE_MPI_INT,comm); } } if (local_n < 0) { col_starts = NULL; } else { col_starts = hypre_CTAlloc(HYPRE_Int,num_procs+1); if (my_id == 0 && local_n == global_n) { col_starts[1] = local_n; } else { hypre_MPI_Allgather(&local_n,1,HYPRE_MPI_INT,&col_starts[1],1,HYPRE_MPI_INT,comm); } } if (row_starts && col_starts) { equal = 1; for (i=0; i < num_procs; i++) { row_starts[i+1] += row_starts[i]; col_starts[i+1] += col_starts[i]; if (row_starts[i+1] != col_starts[i+1]) equal = 0; } if (equal) { hypre_TFree(col_starts); col_starts = row_starts; } } hypre_IJMatrixLocalStorage(matrix) = hypre_ParCSRMatrixCreate(comm,global_m, global_n,row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); return ierr; }
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
hypre_ParCSRMatrix *hypre_ParCSRAAt( hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Complex *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Complex *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_ext_row_map; HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; hypre_CSRMatrix *C_diag; HYPRE_Complex *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; HYPRE_Complex *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int *new_C_offd_j; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int last_col_diag_C; HYPRE_Int num_cols_offd_C; hypre_CSRMatrix *A_ext; HYPRE_Complex *A_ext_data; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Int num_rows_A_ext=0; HYPRE_Int first_row_index_A = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int first_col_diag_A = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *B_marker; HYPRE_Int i; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int count; HYPRE_Int n_rows_A, n_cols_A; HYPRE_Complex a_entry; HYPRE_Complex a_b_product; HYPRE_Complex zero = 0.0; n_rows_A = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); if (n_cols_A != n_rows_A) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /*----------------------------------------------------------------------- * Extract A_ext, i.e. portion of A that is stored on neighbor procs * and needed locally for A^T in the matrix matrix product A*A^T *-----------------------------------------------------------------------*/ if (num_rows_diag_A != n_rows_A) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!hypre_ParCSRMatrixCommPkg(A)) { hypre_MatTCommPkgCreate(A); } A_ext = hypre_ParCSRMatrixExtractAExt( A, 1, &A_ext_row_map ); A_ext_data = hypre_CSRMatrixData(A_ext); A_ext_i = hypre_CSRMatrixI(A_ext); A_ext_j = hypre_CSRMatrixJ(A_ext); num_rows_A_ext = hypre_CSRMatrixNumRows(A_ext); } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ B_marker = hypre_CTAlloc(HYPRE_Int, num_rows_diag_A+num_rows_A_ext ); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } hypre_ParAat_RowSizes( &C_diag_i, &C_offd_i, B_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, A_col_map_offd, A_ext_i, A_ext_j, A_ext_row_map, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, num_rows_A_ext, first_col_diag_A, first_row_index_A ); #if 0 /* debugging output: */ hypre_printf("A_ext_row_map (%i):",num_rows_A_ext); for ( i1=0; i1<num_rows_A_ext; ++i1 ) hypre_printf(" %i",A_ext_row_map[i1] ); hypre_printf("\nC_diag_i (%i):",C_diag_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_diag_i[i1] ); hypre_printf("\nC_offd_i (%i):",C_offd_size); for ( i1=0; i1<=num_rows_diag_A; ++i1 ) hypre_printf(" %i",C_offd_i[i1] ); hypre_printf("\n"); #endif /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_C = first_row_index_A + num_rows_diag_A - 1; C_diag_data = hypre_CTAlloc(HYPRE_Complex, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(HYPRE_Complex, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for ( i1=0; i1<num_rows_diag_A+num_rows_A_ext; ++i1 ) { B_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ B_marker[i1] = jj_count_diag; jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; C_diag_data[jj_count_diag] = zero; C_diag_j[jj_count_diag] = i1; jj_count_diag++; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ /* There are 3 CSRMatrix or CSRBooleanMatrix objects here: ext*ext, ext*diag, and ext*offd belong to another processor. diag*offd and offd*diag don't count - never share a column by definition. So we have to do 4 cases: diag*ext, offd*ext, diag*diag, and offd*offd. */ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /* diag*ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==i2+first_col_diag_A ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_col_diag_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; /* offd * ext */ /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of (A_ext)^T * That is, rows i3 having a column i2 of A_ext. * For now, for each row i3 of A_ext we crudely check _all_ * columns to see whether one matches i2. * For each entry (i2,i3) of (A_ext)^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) . This contributes to both the diag and offd * blocks of C. *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_A_ext; i3++ ) { for ( jj3=A_ext_i[i3]; jj3<A_ext_i[i3+1]; jj3++ ) { if ( A_ext_j[jj3]==A_col_map_offd[i2] ) { /* row i3, column i2 of A_ext; or, row i2, column i3 of (A_ext)^T */ a_b_product = a_entry * A_ext_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if ( A_ext_row_map[i3] < first_row_index_A || A_ext_row_map[i3] > last_col_diag_C ) { /* offd */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_offd) { B_marker[i3+num_rows_diag_A] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3; jj_count_offd++; } else C_offd_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } else { /* diag */ if (B_marker[i3+num_rows_diag_A] < jj_row_begin_diag) { B_marker[i3+num_rows_diag_A] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3-first_row_index_A; jj_count_diag++; } else C_diag_data[B_marker[i3+num_rows_diag_A]] += a_b_product; } } } } } } /* diag * diag */ /*----------------------------------------------------------------- * Loop over entries (columns) i2 in row i1 of A_diag. * For each such column we will find the contributions of the * corresponding rows i2 of A^T to C=A*A^T . Now we only look * at the local part of A^T - with columns (rows of A) living * on this processor. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the diagonal block of A. * It contributes to the diagonal block of C. * For each entry (i2,i3) of A^T, add A(i1,i2)*A(i3,i2) * to C(i1,i3) *-----------------------------------------------------------*/ for ( i3=0; i3<num_rows_diag_A; i3++ ) { for ( jj3=A_diag_i[i3]; jj3<A_diag_i[i3+1]; jj3++ ) { if ( A_diag_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_diag_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, mark it and increment * counter. *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of i3 loop */ } /* end of third i2 loop */ /* offd * offd */ /*----------------------------------------------------------- * Loop over offd columns i2 of A in A*A^T. Then * loop over offd entries (columns) i3 in row i2 of A^T * That is, rows i3 having a column i2 of A (local part). * For now, for each row i3 of A we crudely check _all_ * columns to see whether one matches i2. * This i3-loop is for the off-diagonal block of A. * It contributes to the diag block of C. * For each entry (i2,i3) of A^T, add A*A^T to C *-----------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; a_entry = A_offd_data[jj2]; for ( i3=0; i3<num_rows_diag_A; i3++ ) { /* ... note that num_rows_diag_A == num_rows_offd_A */ for ( jj3=A_offd_i[i3]; jj3<A_offd_i[i3+1]; jj3++ ) { if ( A_offd_j[jj3]==i2 ) { /* row i3, column i2 of A; or, row i2, column i3 of A^T */ a_b_product = a_entry * A_offd_data[jj3]; /*-------------------------------------------------------- * Check B_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution *--------------------------------------------------------*/ if (B_marker[i3] < jj_row_begin_diag) { B_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[B_marker[i3]] += a_b_product; } } } } /* end of last i3 loop */ } /* end of if (num_cols_offd_A) */ } /* end of fourth and last i2 loop */ #if 0 /* debugging printout */ hypre_printf("end of i1 loop: i1=%i jj_count_diag=%i\n", i1, jj_count_diag ); hypre_printf(" C_diag_j="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%i ",C_diag_j[jj3]); hypre_printf(" C_diag_data="); for ( jj3=0; jj3<jj_count_diag; ++jj3) hypre_printf("%f ",C_diag_data[jj3]); hypre_printf("\n"); hypre_printf(" C_offd_j="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%i ",C_offd_j[jj3]); hypre_printf(" C_offd_data="); for ( jj3=0; jj3<jj_count_offd; ++jj3) hypre_printf("%f ",C_offd_data[jj3]); hypre_printf("\n"); hypre_printf( " B_marker =" ); for ( it=0; it<num_rows_diag_A+num_rows_A_ext; ++it ) hypre_printf(" %i", B_marker[it] ); hypre_printf( "\n" ); #endif } /* end of i1 loop */ /*----------------------------------------------------------------------- * Delete 0-columns in C_offd, i.e. generate col_map_offd and reset * C_offd_j. Note that (with the indexing we have coming into this * block) col_map_offd_C[i3]==A_ext_row_map[i3]. *-----------------------------------------------------------------------*/ for ( i=0; i<num_rows_diag_A+num_rows_A_ext; ++i ) B_marker[i] = -1; for ( i=0; i<C_offd_size; i++ ) B_marker[ C_offd_j[i] ] = -2; count = 0; for (i=0; i < num_rows_diag_A + num_rows_A_ext; i++) { if (B_marker[i] == -2) { B_marker[i] = count; count++; } } num_cols_offd_C = count; if (num_cols_offd_C) { col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); new_C_offd_j = hypre_CTAlloc(HYPRE_Int,C_offd_size); /* ... a bit big, but num_cols_offd_C is too small. It might be worth computing the correct size, which is sum( no. columns in row i, over all rows i ) */ for (i=0; i < C_offd_size; i++) { new_C_offd_j[i] = B_marker[C_offd_j[i]]; col_map_offd_C[ new_C_offd_j[i] ] = A_ext_row_map[ C_offd_j[i] ]; } hypre_TFree(C_offd_j); C_offd_j = new_C_offd_j; } /*---------------------------------------------------------------- * Create C *----------------------------------------------------------------*/ C = hypre_ParCSRMatrixCreate(comm, n_rows_A, n_rows_A, row_starts_A, row_starts_A, num_cols_offd_C, C_diag_size, C_offd_size); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; if (num_cols_offd_C) { C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixOffd(C) = C_offd; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } else hypre_TFree(C_offd_i); /*----------------------------------------------------------------------- * Free B_ext and marker array. *-----------------------------------------------------------------------*/ if (num_cols_offd_A) { hypre_CSRMatrixDestroy(A_ext); A_ext = NULL; } hypre_TFree(B_marker); if ( num_rows_diag_A != n_rows_A ) hypre_TFree(A_ext_row_map); return C; }
HYPRE_ParCSRMatrix GenerateSysLaplacianVCoef( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Int num_fun, HYPRE_Real *mtrx, HYPRE_Real *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index, row, col; HYPRE_Int index, diag_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int local_grid_size; HYPRE_Int first_j, j_ind; HYPRE_Int num_coeffs, num_offd_coeffs; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; HYPRE_Real val; /* for indexing in values */ HYPRE_Int sz = num_fun*num_fun; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_grid_size = nx_local*ny_local*nz_local; local_num_rows = num_fun*local_grid_size; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; num_cols_offd *= num_fun; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[cnt] = offd_i[cnt-1]; diag_i[cnt] += num_fun; if (iz > nz_part[r]) diag_i[cnt] += num_fun; else { if (iz) { offd_i[cnt] += num_fun; } } if (iy > ny_part[q]) diag_i[cnt] += num_fun; else { if (iy) { offd_i[cnt] += num_fun; } } if (ix > nx_part[p]) diag_i[cnt] += num_fun; else { if (ix) { offd_i[cnt] += num_fun; } } if (ix+1 < nx_part[p+1]) diag_i[cnt] += num_fun; else { if (ix+1 < nx) { offd_i[cnt] += num_fun; } } if (iy+1 < ny_part[q+1]) diag_i[cnt] += num_fun; else { if (iy+1 < ny) { offd_i[cnt] += num_fun; } } if (iz+1 < nz_part[r+1]) diag_i[cnt] += num_fun; else { if (iz+1 < nz) { offd_i[cnt] += num_fun; } } num_coeffs = diag_i[cnt]-diag_i[cnt-1]; num_offd_coeffs = offd_i[cnt]-offd_i[cnt-1]; cnt++; for (i=1; i < num_fun; i++) { diag_i[cnt] = diag_i[cnt-1]+num_coeffs; offd_i[cnt] = offd_i[cnt-1]+num_offd_coeffs; cnt++; } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt = diag_i[row_index];; o_cnt = offd_i[row_index];; num_coeffs = diag_i[row_index+1]-diag_i[row_index]; num_offd_coeffs = offd_i[row_index+1]-offd_i[row_index]; first_j = row_index; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[0*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; if (iz > nz_part[r]) { first_j = row_index-nx_local*ny_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iz) { first_j = num_fun*hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iy > ny_part[q]) { first_j = row_index-nx_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iy) { first_j = num_fun*hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (ix > nx_part[p]) { first_j = row_index-num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (ix) { first_j = num_fun*hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (ix+1 < nx_part[p+1]) { first_j = row_index+num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (ix+1 < nx) { first_j = num_fun*hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[1*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iy+1 < ny_part[q+1]) { first_j = row_index+nx_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iy+1 < ny) { first_j = num_fun*hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[2*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } if (iz+1 < nz_part[r+1]) { first_j = row_index+nx_local*ny_local*num_fun; for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = cnt+i*num_coeffs+j; diag_j[j_ind] = first_j+j; diag_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } cnt += num_fun; } else { if (iz+1 < nz) { first_j = num_fun*hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); for (i=0; i < num_fun; i++) { for (j=0; j < num_fun; j++) { j_ind = o_cnt+i*num_offd_coeffs+j; offd_j[j_ind] = first_j+j; offd_data[j_ind] = value[3*sz + i*num_fun+j]*mtrx[i*num_fun+j]; } } o_cnt += num_fun; } } row_index += num_fun; } } } if (num_procs > 1) { cnt = 0; for (i=0; i < local_num_rows; i+=num_fun) { for (j=offd_i[i]; j < offd_i[i+1]; j++) { col_map_offd[cnt++] = offd_j[j]; } } hypre_qsort0(col_map_offd, 0, num_cols_offd-1); for (i=0; i < num_fun*num_cols_offd; i++) for (j=hypre_min(0,abs(i-num_fun)); j < num_cols_offd; j++) if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } for (i=0; i < num_procs+1; i++) global_part[i] *= num_fun; for (j=1; j< num_fun; j++) { for (i=0; i<local_grid_size; i++) { row = i*num_fun+j; diag_index = diag_i[row]; index = diag_index+j; val = diag_data[diag_index]; col = diag_j[diag_index]; diag_data[diag_index] = diag_data[index]; diag_j[diag_index] = diag_j[index]; diag_data[index] = val; diag_j[index] = col; } } #ifdef HYPRE_NO_GLOBAL_PARTITION /* ideally we would use less storage earlier in this function, but this is fine for testing */ { HYPRE_Int tmp1, tmp2; tmp1 = global_part[my_id]; tmp2 = global_part[my_id + 1]; hypre_TFree(global_part); global_part = hypre_CTAlloc(HYPRE_Int, 2); global_part[0] = tmp1; global_part[1] = tmp2; } #endif A = hypre_ParCSRMatrixCreate(comm, num_fun*grid_size, num_fun*grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); return (HYPRE_ParCSRMatrix) A; }
HYPRE_ParCSRMatrix GenerateLaplacian( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Real *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j = NULL; HYPRE_Real *offd_data = NULL; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_num_rows = nx_local*ny_local*nz_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; o_cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iz > nz_part[r]) diag_i[cnt]++; else { if (iz) { offd_i[o_cnt]++; } } if (iy > ny_part[q]) diag_i[cnt]++; else { if (iy) { offd_i[o_cnt]++; } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) diag_i[cnt]++; else { if (iy+1 < ny) { offd_i[o_cnt]++; } } if (iz+1 < nz_part[r+1]) diag_i[cnt]++; else { if (iz+1 < nz) { offd_i[o_cnt]++; } } cnt++; o_cnt++; } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iz > nz_part[r]) { diag_j[cnt] = row_index-nx_local*ny_local; diag_data[cnt++] = value[3]; } else { if (iz) { offd_j[o_cnt] = hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[3]; } } if (iy > ny_part[q]) { diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { offd_j[o_cnt] = hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[2]; } } if (iz+1 < nz_part[r+1]) { diag_j[cnt] = row_index+nx_local*ny_local; diag_data[cnt++] = value[3]; } else { if (iz+1 < nz) { offd_j[o_cnt] = hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[3]; } } row_index++; } } } if (num_procs > 1) { for (i=0; i < num_cols_offd; i++) col_map_offd[i] = offd_j[i]; hypre_qsort0(col_map_offd, 0, num_cols_offd-1); for (i=0; i < num_cols_offd; i++) for (j=0; j < num_cols_offd; j++) if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } #ifdef HYPRE_NO_GLOBAL_PARTITION /* ideally we would use less storage earlier in this function, but this is fine for testing */ { HYPRE_Int tmp1, tmp2; tmp1 = global_part[my_id]; tmp2 = global_part[my_id + 1]; hypre_TFree(global_part); global_part = hypre_CTAlloc(HYPRE_Int, 2); global_part[0] = tmp1; global_part[1] = tmp2; } #endif A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); return (HYPRE_ParCSRMatrix) A; }
HYPRE_Int hypre_ParChordMatrixToParCSRMatrix( hypre_ParChordMatrix *Ac, MPI_Comm comm, hypre_ParCSRMatrix **pAp ) { /* Some parts of this function are copied from hypre_CSRMatrixToParCSRMatrix. */ hypre_ParCSRMatrix *Ap; HYPRE_Int *row_starts, *col_starts; HYPRE_Int global_num_rows, global_num_cols, my_id, num_procs; HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd; HYPRE_Int *local_num_rows; /* not computed HYPRE_Int *local_num_nonzeros; */ HYPRE_Int num_nonzeros, first_col_diag, last_col_diag; HYPRE_Int i,ic,ij,ir,ilocal,p,r,r_p,r_global,r_local, jlen; HYPRE_Int *a_i, *a_j, *ilen; HYPRE_Int **rdofs, **ps; double data; double *a_data; double **datas; hypre_CSRMatrix *local_A; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); hypre_ParChordMatrix_RowStarts ( Ac, comm, &row_starts, &global_num_cols ); /* ... this function works correctly only under some assumptions; see the function definition for details */ global_num_rows = row_starts[num_procs] - row_starts[0]; col_starts = NULL; /* The offd and diag blocks aren't defined until we have both row and column partitions... */ num_cols_offd = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; Ap = hypre_ParCSRMatrixCreate( comm, global_num_rows, global_num_cols, row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); *pAp = Ap; row_starts = hypre_ParCSRMatrixRowStarts(Ap); col_starts = hypre_ParCSRMatrixColStarts(Ap); local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; num_nonzeros = 0; for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { num_nonzeros += hypre_ParChordMatrixNumInchords(Ac)[p]; }; local_A = hypre_CSRMatrixCreate( local_num_rows[my_id], global_num_cols, num_nonzeros ); /* Compute local CSRMatrix-like i,j arrays for this processor. */ ps = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); rdofs = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); datas = hypre_CTAlloc( double*, hypre_ParChordMatrixNumIdofs(Ac) ); ilen = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac) ); jlen = 0; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { ilen[i] = 0; ps[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); rdofs[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); datas[i] = hypre_CTAlloc( double, hypre_ParChordMatrixNumRdofs(Ac) ); /* ... rdofs[i], datas[i] will generally, not always, be much too big */ } for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { for ( ic=0; ic<hypre_ParChordMatrixNumInchords(Ac)[p]; ++ic ) { ilocal = hypre_ParChordMatrixInchordIdof(Ac)[p][ic]; r = hypre_ParChordMatrixInchordRdof(Ac)[p][ic]; data = hypre_ParChordMatrixInchordData(Ac)[p][ic]; ps[ilocal][ ilen[ilocal] ] = p; rdofs[ilocal][ ilen[ilocal] ] = r; datas[ilocal][ ilen[ilocal] ] = data; ++ilen[ilocal]; ++jlen; } }; a_i = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac)+1 ); a_j = hypre_CTAlloc( HYPRE_Int, jlen ); a_data = hypre_CTAlloc( double, jlen ); a_i[0] = 0; for ( ilocal=0; ilocal<hypre_ParChordMatrixNumIdofs(Ac); ++ilocal ) { a_i[ilocal+1] = a_i[ilocal] + ilen[ilocal]; ir = 0; for ( ij=a_i[ilocal]; ij<a_i[ilocal+1]; ++ij ) { p = ps[ilocal][ir]; r_p = rdofs[ilocal][ir]; /* local in proc. p */ r_global = r_p + hypre_ParChordMatrixFirstindexRdof(Ac)[p]; r_local = r_global - hypre_ParChordMatrixFirstindexRdof(Ac)[my_id]; a_j[ij] = r_local; a_data[ij] = datas[ilocal][ir]; ir++; }; }; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { hypre_TFree( ps[i] ); hypre_TFree( rdofs[i] ); hypre_TFree( datas[i] ); }; hypre_TFree( ps ); hypre_TFree( rdofs ); hypre_TFree( datas ); hypre_TFree( ilen ); first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; hypre_CSRMatrixData(local_A) = a_data; hypre_CSRMatrixI(local_A) = a_i; hypre_CSRMatrixJ(local_A) = a_j; hypre_CSRMatrixOwnsData(local_A) = 0; GenerateDiagAndOffd(local_A, Ap, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_TFree(a_data); /* ... the data has been copied into different diag & offd arrays of Ap */ hypre_TFree(a_j); hypre_TFree(a_i); hypre_CSRMatrixData(local_A) = NULL; hypre_CSRMatrixI(local_A) = NULL; hypre_CSRMatrixJ(local_A) = NULL; } hypre_CSRMatrixDestroy(local_A); hypre_TFree(local_num_rows); /* hypre_TFree(csr_matrix_datatypes);*/ return 0; }