HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_Int hypre_InexactPartitionOfUnityInterpolation (hypre_CSRMatrix **P_pointer, HYPRE_Int *i_dof_dof, HYPRE_Int *j_dof_dof, HYPRE_Real *a_dof_dof, HYPRE_Real *unit_vector, HYPRE_Int *i_domain_dof, HYPRE_Int *j_domain_dof, HYPRE_Int num_domains, /* == num-coarsedofs */ HYPRE_Int num_dofs) { HYPRE_Int ierr = 0; HYPRE_Int i,j,k; HYPRE_Int ind = 1; HYPRE_Int nu, nu_max = 1; HYPRE_Real eps = 1.e-24; HYPRE_Int max_iter = 1000; HYPRE_Int iter; HYPRE_Real delta0, delta_old, delta, alpha, tau, beta; HYPRE_Real aux, diag; HYPRE_Real *P_t_coeff; hypre_CSRMatrix *P_t, *P; HYPRE_Real *x,*r,*d,*g,*h; HYPRE_Real *row_sum; HYPRE_Int *i_global_to_local; HYPRE_Int local_dof_counter; HYPRE_Real *diag_dof_dof; /* ------------------------------------------------------------------ domain_dof relation should satisfy the following property: num_domains == num_coarsedofs; each domain contains only one coarse dof; ------------------------------------------------------------------ */ i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs); for (i=0; i < num_dofs; i++) i_global_to_local[i] = -1; local_dof_counter = 0; for (i=0; i < num_domains; i++) if (local_dof_counter < i_domain_dof[i+1]-i_domain_dof[i]) local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i]; /* solve T x = unit_vector; --------------------------------------- */ /* cg loop: ------------------------------------------------------- */ hypre_printf("\n---------------------- num_domains: %d, nnz: %d;\n", num_domains, i_domain_dof[num_domains]); x = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); g = hypre_CTAlloc(HYPRE_Real, num_dofs); r = hypre_CTAlloc(HYPRE_Real, num_dofs); h = hypre_CTAlloc(HYPRE_Real, local_dof_counter); diag_dof_dof = hypre_CTAlloc(HYPRE_Real, i_dof_dof[num_dofs]); for (i=0; i<num_dofs; i++) for (j=i_dof_dof[i]; j<i_dof_dof[i+1]; j++) if (i!=j_dof_dof[j]) diag_dof_dof[j] = 0.e0; else diag_dof_dof[j] = a_dof_dof[j]; delta0 = 0.e0; for (i=0; i < num_dofs; i++) { x[i] = 0.e0; r[i] = unit_vector[i]; delta0+=r[i]*r[i]; } /* compute initial iterate: ierr = compute_sum_A_i_action(x, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); ------------------------------------- */ /* matrix vector product: g < -- T x; ------------------------------ */ ierr= compute_sym_GS_T_action(g, x, h, i_domain_dof, j_domain_dof, nu_max, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); delta = 0; for (i=0; i < num_dofs; i++) { r[i] -= g[i]; delta+=r[i]*r[i]; } if (delta < eps * delta0) goto end_cg; ierr= compute_sym_GS_T_action(g, unit_vector, h, i_domain_dof, j_domain_dof, 1, i_dof_dof, j_dof_dof, diag_dof_dof, i_global_to_local, num_domains, num_dofs); /* ierr = compute_sum_A_i_action(d, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); */ for (i=0; i < num_dofs; i++) d[i]=r[i]/g[i]; /* d contains precondtitioned residual: ------------------------ */ delta = 0.e0; for (i=0; i < num_dofs; i++) delta+=d[i]*r[i]; delta0 = delta; eps = 1.e-12; iter = 0; loop: /* matrix vector product: -------------------------------------- */ ierr= compute_sym_GS_T_action(g, d, h, i_domain_dof, j_domain_dof, nu_max, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); tau = 0.e0; for (i=0; i < num_dofs; i++) tau += d[i]*g[i]; alpha = delta/tau; for (i=0; i < num_dofs; i++) { x[i] += alpha * d[i]; r[i] -= alpha * g[i]; } iter++; delta_old = delta; /* ierr = compute_sum_A_i_action(g, r, i_domain_dof, j_domain_dof, i_dof_dof, j_dof_dof, a_dof_dof, i_global_to_local, num_domains, num_dofs); */ ierr= compute_sym_GS_T_action(g, unit_vector, h, i_domain_dof, j_domain_dof, 1, i_dof_dof, j_dof_dof, diag_dof_dof, i_global_to_local, num_domains, num_dofs); for (i=0; i < num_dofs; i++) g[i] = r[i]/g[i]; delta = 0.e0; for (i=0; i < num_dofs; i++) delta += g[i] * r[i]; hypre_printf("\n---------------------- iter: %d, delta: %le\n", iter, delta); if (delta < eps * delta0 || iter > max_iter) goto end_cg; beta = delta/delta_old; for (i=0; i < num_dofs; i++) d[i] = g[i] + beta * d[i]; goto loop; end_cg: hypre_printf("\n END CG in partition of unity interpolation; num_iters: %d\n", iter); hypre_TFree(r); hypre_TFree(g); hypre_TFree(d); /* ith column of P is T_i x; ----------------------------------- */ P_t_coeff = hypre_CTAlloc(HYPRE_Real, i_domain_dof[num_domains]); for (i=0; i < num_domains; i++) { for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { i_global_to_local[j_domain_dof[j]] = j-i_domain_dof[i]; h[j-i_domain_dof[i]] = 0.e0; } nu = 0; loop_nu: for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { aux = x[j_domain_dof[j]]; for (k=i_dof_dof[j_domain_dof[j]]; k<i_dof_dof[j_domain_dof[j]+1]; k++) if (i_global_to_local[j_dof_dof[k]] > -1) { /* this is a_{i_loc, j_loc} --------------------------------- */ if (j_dof_dof[k] != j_domain_dof[j]) { aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]]; } else { diag = a_dof_dof[k]; } } h[i_global_to_local[j_domain_dof[j]]] = aux/diag; } for (j=i_domain_dof[i+1]-1; j >= i_domain_dof[i]; j--) { aux = x[j_domain_dof[j]]; for (k =i_dof_dof[j_domain_dof[j]+1]-1; k>=i_dof_dof[j_domain_dof[j]]; k--) if (i_global_to_local[j_dof_dof[k]] > -1) { /* this is a_{i_loc, j_loc} --------------------------------- */ if (j_dof_dof[k] != j_domain_dof[j]) { aux -= a_dof_dof[k] * h[i_global_to_local[j_dof_dof[k]]]; } else { diag = a_dof_dof[k]; } } h[i_global_to_local[j_domain_dof[j]]] = aux/diag; } nu++; if (nu < nu_max) goto loop_nu; for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { P_t_coeff[j]= h[i_global_to_local[j_domain_dof[j]]]; i_global_to_local[j_domain_dof[j]] = -1; } } hypre_TFree(diag_dof_dof); hypre_TFree(x); hypre_TFree(h); hypre_TFree(i_global_to_local); P_t = hypre_CSRMatrixCreate(num_domains, num_dofs, i_domain_dof[num_domains]); hypre_CSRMatrixData(P_t) = P_t_coeff; hypre_CSRMatrixI(P_t) = i_domain_dof; hypre_CSRMatrixJ(P_t) = j_domain_dof; row_sum = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) row_sum[i] = 0.e0; for (i=0; i < num_domains; i++) for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) row_sum[j_domain_dof[j]]+=P_t_coeff[j]; delta = 0.e0; for (i=0; i < num_dofs; i++) delta+= (row_sum[i] - 1.e0)*(row_sum[i] - 1.e0); hypre_printf("\n unit row_sum deviation in seq_PU_interpolation: %le\n", sqrt(delta/num_dofs)); hypre_TFree(row_sum); ind = 1; ierr = hypre_CSRMatrixTranspose(P_t, &P, ind); *P_pointer = P; hypre_CSRMatrixI(P_t) = NULL; hypre_CSRMatrixJ(P_t) = NULL; hypre_CSRMatrixDestroy(P_t); return ierr; }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
HYPRE_Int AmgCGCChoose (hypre_CSRMatrix *G,HYPRE_Int *vertexrange,HYPRE_Int mpisize,HYPRE_Int **coarse) /* chooses one grid for every processor * ============================================================ * G : the connectivity graph * map : the parallel layout * mpisize : number of procs * coarse : the chosen coarse grids * ===========================================================*/ { HYPRE_Int i,j,jj,p,choice,*processor,ierr=0; HYPRE_Int measure,new_measure; /* MPI_Comm comm = hypre_ParCSRMatrixComm(G); */ /* hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (G); */ /* hypre_ParCSRCommHandle *comm_handle; */ HYPRE_Real *G_data = hypre_CSRMatrixData (G); HYPRE_Real max; HYPRE_Int *G_i = hypre_CSRMatrixI(G); HYPRE_Int *G_j = hypre_CSRMatrixJ(G); hypre_CSRMatrix *H,*HT; HYPRE_Int *H_i,*H_j,*HT_i,*HT_j; HYPRE_Int jG,jH; HYPRE_Int num_vertices = hypre_CSRMatrixNumRows (G); HYPRE_Int *measure_array; HYPRE_Int *lists,*where; hypre_LinkList LoL_head = NULL; hypre_LinkList LoL_tail = NULL; processor = hypre_CTAlloc (HYPRE_Int,num_vertices); *coarse = hypre_CTAlloc (HYPRE_Int,mpisize); memset (*coarse,0,sizeof(HYPRE_Int)*mpisize); measure_array = hypre_CTAlloc (HYPRE_Int,num_vertices); lists = hypre_CTAlloc (HYPRE_Int,num_vertices); where = hypre_CTAlloc (HYPRE_Int,num_vertices); /* for (p=0;p<mpisize;p++) hypre_printf ("%d: %d-%d\n",p,range[p]+1,range[p+1]); */ /****************************************************************** * determine heavy edges ******************************************************************/ jG = G_i[num_vertices]; H = hypre_CSRMatrixCreate (num_vertices,num_vertices,jG); H_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1); H_j = hypre_CTAlloc (HYPRE_Int,jG); hypre_CSRMatrixI(H) = H_i; hypre_CSRMatrixJ(H) = H_j; for (i=0,p=0;i<num_vertices;i++) { while (vertexrange[p+1]<=i) p++; processor[i]=p; } H_i[0]=0; for (i=0,jj=0;i<num_vertices;i++) { #if 0 hypre_printf ("neighbors of grid %d:",i); #endif H_i[i+1]=H_i[i]; for (j=G_i[i],choice=-1,max=0;j<G_i[i+1];j++) { #if 0 if (G_data[j]>=0.0) hypre_printf ("G[%d,%d]=0. G_j(j)=%d, G_data(j)=%f.\n",i,G_j[j],j,G_data[j]); #endif /* G_data is always negative, so this test is sufficient */ if (choice==-1 || G_data[j]>max) { choice = G_j[j]; max = G_data[j]; } if (j==G_i[i+1]-1 || processor[G_j[j+1]] > processor[choice]) { /* we are done for this processor boundary */ H_j[jj++]=choice; H_i[i+1]++; #if 0 hypre_printf (" %d",choice); #endif choice = -1; max=0; } } #if 0 hypre_printf("\n"); #endif } /****************************************************************** * compute H^T, the transpose of H ******************************************************************/ jH = H_i[num_vertices]; HT = hypre_CSRMatrixCreate (num_vertices,num_vertices,jH); HT_i = hypre_CTAlloc (HYPRE_Int,num_vertices+1); HT_j = hypre_CTAlloc (HYPRE_Int,jH); hypre_CSRMatrixI(HT) = HT_i; hypre_CSRMatrixJ(HT) = HT_j; for (i=0; i <= num_vertices; i++) HT_i[i] = 0; for (i=0; i < jH; i++) { HT_i[H_j[i]+1]++; } for (i=0; i < num_vertices; i++) { HT_i[i+1] += HT_i[i]; } for (i=0; i < num_vertices; i++) { for (j=H_i[i]; j < H_i[i+1]; j++) { HYPRE_Int myindex = H_j[j]; HT_j[HT_i[myindex]] = i; HT_i[myindex]++; } } for (i = num_vertices; i > 0; i--) { HT_i[i] = HT_i[i-1]; } HT_i[0] = 0; /***************************************************************** * set initial vertex weights *****************************************************************/ for (i=0;i<num_vertices;i++) { measure_array[i] = H_i[i+1] - H_i[i] + HT_i[i+1] - HT_i[i]; enter_on_lists (&LoL_head,&LoL_tail,measure_array[i],i,lists,where); } /****************************************************************** * apply CGC iteration ******************************************************************/ while (LoL_head && measure_array[LoL_head->head]) { choice = LoL_head->head; measure = measure_array[choice]; #if 0 hypre_printf ("Choice: %d, measure %d, processor %d\n",choice, measure,processor[choice]); fflush(stdout); #endif (*coarse)[processor[choice]] = choice+1; /* add one because coarsegrid indexing starts with 1, not 0 */ /* new maximal weight */ new_measure = measure+1; for (i=vertexrange[processor[choice]];i<vertexrange[processor[choice]+1];i++) { /* set weights for all remaining vertices on this processor to zero */ measure = measure_array[i]; remove_point (&LoL_head,&LoL_tail,measure,i,lists,where); measure_array[i]=0; } for (j=H_i[choice];j<H_i[choice+1];j++){ jj = H_j[j]; /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */ if (!(*coarse)[processor[jj]]) { measure = measure_array[jj]; remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where); enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where); measure_array[jj]=new_measure; } } for (j=HT_i[choice];j<HT_i[choice+1];j++) { jj = HT_j[j]; /* if no vertex is chosen on this proc, set weights of all heavily coupled vertices to max1 */ if (!(*coarse)[processor[jj]]) { measure = measure_array[jj]; remove_point (&LoL_head,&LoL_tail,measure,jj,lists,where); enter_on_lists (&LoL_head,&LoL_tail,new_measure,jj,lists,where); measure_array[jj]=new_measure; } } } /* remove remaining list elements, if they exist. They all should have measure 0 */ while (LoL_head) { i = LoL_head->head; measure = measure_array[i]; #if 0 hypre_assert (measure==0); #endif remove_point (&LoL_head,&LoL_tail,measure,i,lists,where); } for (p=0;p<mpisize;p++) /* if the algorithm has not determined a coarse vertex for this proc, simply take the last one Do not take the first one, it might by empty! */ if (!(*coarse)[p]) { (*coarse)[p] = vertexrange[p+1]; /* hypre_printf ("choice for processor %d: %d\n",p,range[p]+1); */ } /******************************************** * clean up ********************************************/ hypre_CSRMatrixDestroy (H); hypre_CSRMatrixDestroy (HT); hypre_TFree (processor); hypre_TFree (measure_array); hypre_TFree (lists); hypre_TFree (where); return(ierr); }
/************************************************************** * * CGC Coarsening routine * **************************************************************/ HYPRE_Int hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int measure_type, HYPRE_Int coarsen_type, HYPRE_Int cgc_its, HYPRE_Int debug_flag, HYPRE_Int **CF_marker_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows(S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(S_offd); hypre_CSRMatrix *S_ext; HYPRE_Int *S_ext_i; HYPRE_Int *S_ext_j; hypre_CSRMatrix *ST; HYPRE_Int *ST_i; HYPRE_Int *ST_j; HYPRE_Int *CF_marker; HYPRE_Int *CF_marker_offd=NULL; HYPRE_Int ci_tilde = -1; HYPRE_Int ci_tilde_mark = -1; HYPRE_Int *measure_array; HYPRE_Int *measure_array_master; HYPRE_Int *graph_array; HYPRE_Int *int_buf_data=NULL; /*HYPRE_Int *ci_array=NULL;*/ HYPRE_Int i, j, k, l, jS; HYPRE_Int ji, jj, index; HYPRE_Int set_empty = 1; HYPRE_Int C_i_nonempty = 0; HYPRE_Int num_nonzeros; HYPRE_Int num_procs, my_id; HYPRE_Int num_sends = 0; HYPRE_Int first_col, start; HYPRE_Int col_0, col_n; hypre_LinkList LoL_head; hypre_LinkList LoL_tail; HYPRE_Int *lists, *where; HYPRE_Int measure, new_meas; HYPRE_Int num_left; HYPRE_Int nabor, nabor_two; HYPRE_Int ierr = 0; HYPRE_Int use_commpkg_A = 0; HYPRE_Real wall_time; HYPRE_Int measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */ if (coarsen_type < 0) coarsen_type = -coarsen_type; /*------------------------------------------------------- * Initialize the C/F marker, LoL_head, LoL_tail arrays *-------------------------------------------------------*/ LoL_head = NULL; LoL_tail = NULL; lists = hypre_CTAlloc(HYPRE_Int, num_variables); where = hypre_CTAlloc(HYPRE_Int, num_variables); #if 0 /* debugging */ char filename[256]; FILE *fp; HYPRE_Int iter = 0; #endif /*-------------------------------------------------------------- * Compute a CSR strength matrix, S. * * For now, the "strength" of dependence/influence is defined in * the following way: i depends on j if * aij > hypre_max (k != i) aik, aii < 0 * or * aij < hypre_min (k != i) aik, aii >= 0 * Then S_ij = 1, else S_ij = 0. * * NOTE: the entries are negative initially, corresponding * to "unaccounted-for" dependence. *----------------------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (!comm_pkg) { use_commpkg_A = 1; comm_pkg = hypre_ParCSRMatrixCommPkg(A); } if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd); jS = S_i[num_variables]; ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS); ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1); ST_j = hypre_CTAlloc(HYPRE_Int,jS); hypre_CSRMatrixI(ST) = ST_i; hypre_CSRMatrixJ(ST) = ST_j; /*---------------------------------------------------------- * generate transpose of S, ST *----------------------------------------------------------*/ for (i=0; i <= num_variables; i++) ST_i[i] = 0; for (i=0; i < jS; i++) { ST_i[S_j[i]+1]++; } for (i=0; i < num_variables; i++) { ST_i[i+1] += ST_i[i]; } for (i=0; i < num_variables; i++) { for (j=S_i[i]; j < S_i[i+1]; j++) { index = S_j[j]; ST_j[ST_i[index]] = i; ST_i[index]++; } } for (i = num_variables; i > 0; i--) { ST_i[i] = ST_i[i-1]; } ST_i[0] = 0; /*---------------------------------------------------------- * Compute the measures * * The measures are given by the row sums of ST. * Hence, measure_array[i] is the number of influences * of variable i. * correct actual measures through adding influences from * neighbor processors *----------------------------------------------------------*/ measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables); measure_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { measure_array_master[i] = ST_i[i+1]-ST_i[i]; } if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) { if (use_commpkg_A) S_ext = hypre_ParCSRMatrixExtractBExt(S,A,0); else S_ext = hypre_ParCSRMatrixExtractBExt(S,S,0); S_ext_i = hypre_CSRMatrixI(S_ext); S_ext_j = hypre_CSRMatrixJ(S_ext); num_nonzeros = S_ext_i[num_cols_offd]; first_col = hypre_ParCSRMatrixFirstColDiag(S); col_0 = first_col-1; col_n = col_0+num_variables; if (measure_type) { for (i=0; i < num_nonzeros; i++) { index = S_ext_j[i] - first_col; if (index > -1 && index < num_variables) measure_array_master[index]++; } } } /*--------------------------------------------------- * Loop until all points are either fine or coarse. *---------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); /* first coarsening phase */ /************************************************************* * * Initialize the lists * *************************************************************/ CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); num_left = 0; for (j = 0; j < num_variables; j++) { if ((S_i[j+1]-S_i[j])== 0 && (S_offd_i[j+1]-S_offd_i[j]) == 0) { CF_marker[j] = SF_PT; measure_array_master[j] = 0; } else { CF_marker[j] = UNDECIDED; /* num_left++; */ /* BM May 19, 2006: see below*/ } } if (coarsen_type==22) { /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: (a) the point has no strong connections at all, OR (b) the point has a strong connection across a boundary */ for (j=0;j<num_variables;j++) if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;} } for (l = 1; l <= cgc_its; l++) { LoL_head = NULL; LoL_tail = NULL; num_left = 0; /* compute num_left before each RS coarsening loop */ memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int)); memset (lists,0,sizeof(HYPRE_Int)*num_variables); memset (where,0,sizeof(HYPRE_Int)*num_variables); for (j = 0; j < num_variables; j++) { measure = measure_array[j]; if (CF_marker[j] != SF_PT) { if (measure > 0) { enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where); num_left++; /* compute num_left before each RS coarsening loop */ } else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only if j is not conained in a previously constructed coarse grid. Reason: these neighbors should start with the same initial weight in each CGC iteration. BM Aug 30, 2006 */ { if (measure < 0) hypre_printf("negative measure!\n"); /* CF_marker[j] = f_pnt; */ for (k = S_i[j]; k < S_i[j+1]; k++) { nabor = S_j[k]; /* if (CF_marker[nabor] != SF_PT) */ if (CF_marker[nabor] == 0) /* BM Aug 30, 2006: don't alter weights of points contained in other candidate coarse grids */ { if (nabor < j) { new_meas = measure_array[nabor]; if (new_meas > 0) remove_point(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); else num_left++; /* BM Aug 29, 2006 */ new_meas = ++(measure_array[nabor]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); } else { new_meas = ++(measure_array[nabor]); } } } /* --num_left; */ /* BM May 19, 2006 */ } } } /* BM Aug 30, 2006: first iteration: determine maximal weight */ if (num_left && l==1) measure_max = measure_array[LoL_head->head]; /* BM Aug 30, 2006: break CGC iteration if no suitable starting point is available any more */ if (!num_left || measure_array[LoL_head->head]<measure_max) { while (LoL_head) { hypre_LinkList list_ptr = LoL_head; LoL_head = LoL_head->next_elt; dispose_elt (list_ptr); } break; } /**************************************************************** * * Main loop of Ruge-Stueben first coloring pass. * * WHILE there are still points to classify DO: * 1) find first point, i, on list with max_measure * make i a C-point, remove it from the lists * 2) For each point, j, in S_i^T, * a) Set j to be an F-point * b) For each point, k, in S_j * move k to the list in LoL with measure one * greater than it occupies (creating new LoL * entry if necessary) * 3) For each point, j, in S_i, * move j to the list in LoL with measure one * smaller than it occupies (creating new LoL * entry if necessary) * ****************************************************************/ while (num_left > 0) { index = LoL_head -> head; /* index = LoL_head -> tail; */ /* CF_marker[index] = C_PT; */ CF_marker[index] = l; /* BM Aug 18, 2006 */ measure = measure_array[index]; measure_array[index] = 0; measure_array_master[index] = 0; /* BM May 19: for CGC */ --num_left; remove_point(&LoL_head, &LoL_tail, measure, index, lists, where); for (j = ST_i[index]; j < ST_i[index+1]; j++) { nabor = ST_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ measure = measure_array[nabor]; measure_array[nabor]=0; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) /* undecided point */ { measure = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, measure, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } for (j = S_i[index]; j < S_i[index+1]; j++) { nabor = S_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { measure = measure_array[nabor]; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); measure_array[nabor] = --measure; if (measure > 0) enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, lists, where); else { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) { new_meas = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } } } if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head); } l--; /* BM Aug 15, 2006 */ hypre_TFree(measure_array); hypre_TFree(measure_array_master); hypre_CSRMatrixDestroy(ST); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 1st pass = %f\n", my_id, wall_time); } hypre_TFree(lists); hypre_TFree(where); if (num_procs>1) { if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen CGC = %f\n", my_id, wall_time); } } else { /* the first candiate coarse grid is the coarse grid */ for (j=0;j<num_variables;j++) { if (CF_marker[j]==1) CF_marker[j]=C_PT; else CF_marker[j]=F_PT; } } /* BM May 19, 2006: Set all undecided points to be fine grid points. */ for (j=0;j<num_variables;j++) if (!CF_marker[j]) CF_marker[j]=F_PT; /*--------------------------------------------------- * Initialize the graph array *---------------------------------------------------*/ graph_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { graph_array[i] = -1; } if (debug_flag == 3) wall_time = time_getWallclockSeconds(); for (i=0; i < num_variables; i++) { if (ci_tilde_mark != i) ci_tilde = -1; if (CF_marker[i] == -1) { for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] > 0) graph_array[j] = i; } for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] == -1) { set_empty = 1; for (jj = S_i[j]; jj < S_i[j+1]; jj++) { index = S_j[jj]; if (graph_array[index] == i) { set_empty = 0; break; } } if (set_empty) { if (C_i_nonempty) { CF_marker[i] = 1; if (ci_tilde > -1) { CF_marker[ci_tilde] = -1; ci_tilde = -1; } C_i_nonempty = 0; break; } else { ci_tilde = j; ci_tilde_mark = i; CF_marker[j] = 1; C_i_nonempty = 1; i--; break; } } } } } } if (debug_flag == 3 && coarsen_type != 2) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 2nd pass = %f\n", my_id, wall_time); } /* third pass, check boundary fine points for coarse neighbors */ /*------------------------------------------------ * Exchange boundary data for CF_marker *------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (num_procs > 1) { comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d CGC boundary fix = %f\n", my_id, wall_time); } /*--------------------------------------------------- * Clean up and return *---------------------------------------------------*/ /*if (coarsen_type != 1) { */ if (CF_marker_offd) hypre_TFree(CF_marker_offd); /* BM Aug 21, 2006 */ if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */ /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */ /*} */ hypre_TFree(graph_array); if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) hypre_CSRMatrixDestroy(S_ext); *CF_marker_ptr = CF_marker; return (ierr); }
hypre_CSRMatrix * hypre_GenerateDifConv( HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Real *value ) { hypre_CSRMatrix *A; HYPRE_Int *A_i; HYPRE_Int *A_j; HYPRE_Real *A_data; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int p, q, r; HYPRE_Int cnt; HYPRE_Int num_rows; HYPRE_Int row_index; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; num_rows = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } A_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); cnt = 1; A_i[0] = 0; for (r = 0; r < R; r++) { for (q = 0; q < Q; q++) { for (p = 0; p < P; p++) { for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { A_i[cnt] = A_i[cnt-1]; A_i[cnt]++; if (iz > nz_part[r]) A_i[cnt]++; else { if (iz) { A_i[cnt]++; } } if (iy > ny_part[q]) A_i[cnt]++; else { if (iy) { A_i[cnt]++; } } if (ix > nx_part[p]) A_i[cnt]++; else { if (ix) { A_i[cnt]++; } } if (ix+1 < nx_part[p+1]) A_i[cnt]++; else { if (ix+1 < nx) { A_i[cnt]++; } } if (iy+1 < ny_part[q+1]) A_i[cnt]++; else { if (iy+1 < ny) { A_i[cnt]++; } } if (iz+1 < nz_part[r+1]) A_i[cnt]++; else { if (iz+1 < nz) { A_i[cnt]++; } } cnt++; } } } } } } A_j = hypre_CTAlloc(HYPRE_Int, A_i[num_rows]); A_data = hypre_CTAlloc(HYPRE_Real, A_i[num_rows]); row_index = 0; cnt = 0; for (r = 0; r < R; r++) { for (q = 0; q < Q; q++) { ny_size = ny_part[q+1]-ny_part[q]; for (p = 0; p < P; p++) { nx_size = nx_part[p+1] - nx_part[p]; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { A_j[cnt] = row_index; A_data[cnt++] = value[0]; if (iz > nz_part[r]) { A_j[cnt] = row_index-nx_size*ny_size; A_data[cnt++] = value[3]; } else { if (iz) { A_j[cnt] = map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[3]; } } if (iy > ny_part[q]) { A_j[cnt] = row_index-nx_size; A_data[cnt++] = value[2]; } else { if (iy) { A_j[cnt] = map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[2]; } } if (ix > nx_part[p]) { A_j[cnt] = row_index-1; A_data[cnt++] = value[1]; } else { if (ix) { A_j[cnt] = map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { A_j[cnt] = row_index+1; A_data[cnt++] = value[4]; } else { if (ix+1 < nx) { A_j[cnt] = map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[4]; } } if (iy+1 < ny_part[q+1]) { A_j[cnt] = row_index+nx_size; A_data[cnt++] = value[5]; } else { if (iy+1 < ny) { A_j[cnt] = map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[5]; } } if (iz+1 < nz_part[r+1]) { A_j[cnt] = row_index+nx_size*ny_size; A_data[cnt++] = value[6]; } else { if (iz+1 < nz) { A_j[cnt] = map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); A_data[cnt++] = value[6]; } } row_index++; } } } } } } A = hypre_CSRMatrixCreate(num_rows, num_rows, A_i[num_rows]); hypre_CSRMatrixI(A) = A_i; hypre_CSRMatrixJ(A) = A_j; hypre_CSRMatrixData(A) = A_data; hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); hypre_TFree(global_part); return A; }
HYPRE_Int hypre_BoomerAMGBlockCreateNodalA(hypre_ParCSRBlockMatrix *A, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(A); hypre_CSRBlockMatrix *A_diag = hypre_ParCSRBlockMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRBlockMatrixI(A_diag); HYPRE_Real *A_diag_data = hypre_CSRBlockMatrixData(A_diag); HYPRE_Int block_size = hypre_CSRBlockMatrixBlockSize(A_diag); HYPRE_Int bnnz = block_size*block_size; hypre_CSRBlockMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRBlockMatrixI(A_offd); HYPRE_Real *A_offd_data = hypre_CSRBlockMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRBlockMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRBlockMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(A); HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j=NULL; HYPRE_Real *AN_diag_data = NULL; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j = NULL; HYPRE_Real *AN_offd_data = NULL; HYPRE_Int *col_map_offd_AN = NULL; HYPRE_Int *row_starts_AN; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN = NULL; HYPRE_Int *send_procs_AN = NULL; HYPRE_Int *send_map_starts_AN = NULL; HYPRE_Int *send_map_elmts_AN = NULL; HYPRE_Int *recv_procs_AN = NULL; HYPRE_Int *recv_vec_starts_AN = NULL; HYPRE_Int i; HYPRE_Int ierr = 0; HYPRE_Int num_procs; HYPRE_Int cnt; HYPRE_Int norm_type; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int index, k; HYPRE_Real tmp; HYPRE_Real sum; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } norm_type = fabs(option); /* Set up the new matrix AN */ #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]; } #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]; } #endif global_num_nodes = hypre_ParCSRBlockMatrixGlobalNumRows(A); num_nodes = hypre_CSRBlockMatrixNumRows(A_diag); /* the diag part */ num_nonzeros_diag = A_diag_i[num_nodes]; AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); for (i=0; i <= num_nodes; i++) { AN_diag_i[i] = A_diag_i[i]; } AN_diag_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes, num_nodes, num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i< num_nonzeros_diag; i++) { AN_diag_j[i] = A_diag_j[i]; hypre_CSRBlockMatrixBlockNorm(norm_type, &A_diag_data[i*bnnz], &tmp, block_size); AN_diag_data[i] = tmp; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (NEED * to get more below!)*/ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = -AN_diag_data[index]; } } /* copy the commpkg */ if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int, num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int, num_sends+1); send_map_starts_AN[0] = 0; for (i=0; i < num_sends; i++) { send_procs_AN[i] = send_procs[i]; send_map_starts_AN[i+1] = send_map_starts[i+1]; } cnt = send_map_starts_AN[num_sends]; for (i=0; i< cnt; i++) { send_map_elmts_AN[i] = send_map_elmts[i]; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int, num_recvs+1); if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int, num_recvs); recv_vec_starts_AN[0] = recv_vec_starts[0]; for (i=0; i < num_recvs; i++) { recv_procs_AN[i] = recv_procs[i]; recv_vec_starts_AN[i+1] = recv_vec_starts[i+1]; } hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } /* the off-diag part */ num_cols_offd = hypre_CSRBlockMatrixNumCols(A_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int, num_cols_offd); for (i=0; i < num_cols_offd; i++) { col_map_offd_AN[i] = col_map_offd[i]; } num_nonzeros_offd = A_offd_i[num_nodes]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); for (i=0; i <= num_nodes; i++) { AN_offd_i[i] = A_offd_i[i]; } AN_offd_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros_offd); for (i=0; i< num_nonzeros_offd; i++) { AN_offd_j[i] = A_offd_j[i]; hypre_CSRBlockMatrixBlockNorm(norm_type, &A_offd_data[i*bnnz], &tmp, block_size); AN_offd_data[i] = tmp; } AN_offd = hypre_CSRMatrixCreate(num_nodes, num_cols_offd, num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } /* now create AN */ AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; *AN_ptr = AN; return (ierr); }
hypre_CSRMatrix * hypre_GenerateLaplacian9pt( HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, double *value ) { hypre_CSRMatrix *A; HYPRE_Int *A_i; HYPRE_Int *A_j; double *A_data; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int p, q; HYPRE_Int cnt; HYPRE_Int num_rows; HYPRE_Int row_index; HYPRE_Int nx_size, ny_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; num_rows = nx*ny; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } A_i = hypre_CTAlloc(HYPRE_Int,num_rows+1); cnt = 0; A_i[0] = 0; for (q = 0; q < Q; q++) { for (p=0; p < P; p++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; A_i[cnt] = A_i[cnt-1]; A_i[cnt]++; if (iy > ny_part[q]) { A_i[cnt]++; if (ix > nx_part[p]) { A_i[cnt]++; } else { if (ix) A_i[cnt]++; } if (ix < nx_part[p+1]-1) { A_i[cnt]++; } else { if (ix+1 < nx) A_i[cnt]++; } } else { if (iy) { A_i[cnt]++; if (ix > nx_part[p]) { A_i[cnt]++; } else if (ix) { A_i[cnt]++; } if (ix < nx_part[p+1]-1) { A_i[cnt]++; } else if (ix < nx-1) { A_i[cnt]++; } } } if (ix > nx_part[p]) A_i[cnt]++; else { if (ix) { A_i[cnt]++; } } if (ix+1 < nx_part[p+1]) A_i[cnt]++; else { if (ix+1 < nx) { A_i[cnt]++; } } if (iy+1 < ny_part[q+1]) { A_i[cnt]++; if (ix > nx_part[p]) { A_i[cnt]++; } else { if (ix) A_i[cnt]++; } if (ix < nx_part[p+1]-1) { A_i[cnt]++; } else { if (ix+1 < nx) A_i[cnt]++; } } else { if (iy+1 < ny) { A_i[cnt]++; if (ix > nx_part[p]) { A_i[cnt]++; } else if (ix) { A_i[cnt]++; } if (ix < nx_part[p+1]-1) { A_i[cnt]++; } else if (ix < nx-1) { A_i[cnt]++; } } } } } } } A_j = hypre_CTAlloc(HYPRE_Int, A_i[num_rows]); A_data = hypre_CTAlloc(double, A_i[num_rows]); row_index = 0; cnt = 0; for (q=0; q < Q; q++) { for (p=0; p < P; p++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { nx_size = nx_part[p+1]-nx_part[p]; A_j[cnt] = row_index; A_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { A_j[cnt] = row_index-nx_size-1 ; A_data[cnt++] = value[1]; } else { if (ix) { A_j[cnt] = map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } A_j[cnt] = row_index-nx_size; A_data[cnt++] = value[1]; if (ix < nx_part[p+1]-1) { A_j[cnt] = row_index-nx_size+1 ; A_data[cnt++] = value[1]; } else { if (ix+1 < nx) { A_j[cnt] = map2(ix+1,iy-1,p+1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } } else { if (iy) { if (ix > nx_part[p]) { A_j[cnt] = map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } else if (ix) { A_j[cnt] = map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } A_j[cnt] = map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; if (ix < nx_part[p+1]-1) { A_j[cnt] = map2(ix+1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } else if (ix+1 < nx) { A_j[cnt] = map2(ix+1,iy-1,p+1,q-1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } } if (ix > nx_part[p]) { A_j[cnt] = row_index-1; A_data[cnt++] = value[1]; } else { if (ix) { A_j[cnt] = map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { A_j[cnt] = row_index+1; A_data[cnt++] = value[1]; } else { if (ix+1 < nx) { A_j[cnt] = map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { if (ix > nx_part[p]) { A_j[cnt] = row_index+nx_size-1 ; A_data[cnt++] = value[1]; } else { if (ix) { A_j[cnt] = map2(ix-1,iy+1,p-1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } A_j[cnt] = row_index+nx_size; A_data[cnt++] = value[1]; if (ix < nx_part[p+1]-1) { A_j[cnt] = row_index+nx_size+1 ; A_data[cnt++] = value[1]; } else { if (ix+1 < nx) { A_j[cnt] = map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } } else { if (iy+1 < ny) { if (ix > nx_part[p]) { A_j[cnt] = map2(ix-1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } else if (ix) { A_j[cnt] = map2(ix-1,iy+1,p-1,q+1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } A_j[cnt] = map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; if (ix < nx_part[p+1]-1) { A_j[cnt] = map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } else if (ix < nx-1) { A_j[cnt] = map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); A_data[cnt++] = value[1]; } } } row_index++; } } } } A = hypre_CSRMatrixCreate(num_rows, num_rows, A_i[num_rows]); hypre_CSRMatrixI(A) = A_i; hypre_CSRMatrixJ(A) = A_j; hypre_CSRMatrixData(A) = A_data; hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(global_part); return A; }
void hypre_CSRMatrixSplit(hypre_CSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, HYPRE_Int *row_block_num, HYPRE_Int *col_block_num, hypre_CSRMatrix **blocks) { HYPRE_Int i, j, k, bi, bj; HYPRE_Int* A_i = hypre_CSRMatrixI(A); HYPRE_Int* A_j = hypre_CSRMatrixJ(A); HYPRE_Complex* A_data = hypre_CSRMatrixData(A); HYPRE_Int A_rows = hypre_CSRMatrixNumRows(A); HYPRE_Int A_cols = hypre_CSRMatrixNumCols(A); HYPRE_Int *num_rows = hypre_CTAlloc(HYPRE_Int, nr); HYPRE_Int *num_cols = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_row = hypre_TAlloc(HYPRE_Int, A_rows); HYPRE_Int *block_col = hypre_TAlloc(HYPRE_Int, A_cols); for (i = 0; i < A_rows; i++) { block_row[i] = num_rows[row_block_num[i]]++; } for (j = 0; j < A_cols; j++) { block_col[j] = num_cols[col_block_num[j]]++; } /* allocate the blocks */ for (i = 0; i < nr; i++) { for (j = 0; j < nc; j++) { hypre_CSRMatrix *B = hypre_CSRMatrixCreate(num_rows[i], num_cols[j], 0); hypre_CSRMatrixI(B) = hypre_CTAlloc(HYPRE_Int, num_rows[i] + 1); blocks[i*nc + j] = B; } } /* count block row nnz */ for (i = 0; i < A_rows; i++) { bi = row_block_num[i]; for (j = A_i[i]; j < A_i[i+1]; j++) { bj = col_block_num[A_j[j]]; hypre_CSRMatrix *B = blocks[bi*nc + bj]; hypre_CSRMatrixI(B)[block_row[i] + 1]++; } } /* count block nnz */ for (k = 0; k < nr*nc; k++) { hypre_CSRMatrix *B = blocks[k]; HYPRE_Int* B_i = hypre_CSRMatrixI(B); HYPRE_Int nnz = 0, rs; for (int k = 1; k <= hypre_CSRMatrixNumRows(B); k++) { rs = B_i[k], B_i[k] = nnz, nnz += rs; } hypre_CSRMatrixJ(B) = hypre_TAlloc(HYPRE_Int, nnz); hypre_CSRMatrixData(B) = hypre_TAlloc(HYPRE_Complex, nnz); hypre_CSRMatrixNumNonzeros(B) = nnz; } /* populate blocks */ for (i = 0; i < A_rows; i++) { bi = row_block_num[i]; for (j = A_i[i]; j < A_i[i+1]; j++) { k = A_j[j]; bj = col_block_num[k]; hypre_CSRMatrix *B = blocks[bi*nc + bj]; HYPRE_Int *bii = hypre_CSRMatrixI(B) + block_row[i] + 1; hypre_CSRMatrixJ(B)[*bii] = block_col[k]; hypre_CSRMatrixData(B)[*bii] = A_data[j]; (*bii)++; } } hypre_TFree(block_col); hypre_TFree(block_row); hypre_TFree(num_cols); hypre_TFree(num_rows); }
HYPRE_Int hypre_AMGBuildRBMInterp( hypre_CSRMatrix *A, HYPRE_Int *CF_marker, hypre_CSRMatrix *S, HYPRE_Int *dof_func, HYPRE_Int num_functions, HYPRE_Int **coarse_dof_func_ptr, hypre_CSRMatrix **P_ptr ) { hypre_CSRMatrix *P; HYPRE_Int *coarse_dof_func; HYPRE_Real *Prolong_coeff; HYPRE_Int *i_dof_neighbor_coarsedof; HYPRE_Int *j_dof_neighbor_coarsedof; HYPRE_Int *S_i = hypre_CSRMatrixI(S); HYPRE_Int *S_j = hypre_CSRMatrixJ(S); HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A); HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A); HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A); HYPRE_Int *i_ext_int, *j_ext_int; HYPRE_Int ext_int_counter; HYPRE_Int *fine_to_coarse; HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A); HYPRE_Int ierr = 0; HYPRE_Int i, j, k, l_loc, i_loc, j_loc; HYPRE_Int i_dof, j_dof; HYPRE_Int *i_local_to_global; HYPRE_Int *i_global_to_local; /* HYPRE_Int i_dof_on_list =-1; */ HYPRE_Int local_dof_counter, max_local_dof_counter=0; HYPRE_Int fine_node_counter, coarse_node_counter; HYPRE_Int dof_neighbor_coarsedof_counter = 0, coarsedof_counter = 0, dof_counter = 0; HYPRE_Int *i_fine, *i_coarse; HYPRE_Int *i_int; HYPRE_Int *i_fine_to_global, *i_coarse_to_global; HYPRE_Real *AE; /* HYPRE_Real coeff_sum; */ HYPRE_Real *P_ext_int; HYPRE_Real diag = 0.e0; /*----------------------------------------------------------------------- * First Pass: Determine size of Prolong; *-----------------------------------------------------------------------*/ dof_neighbor_coarsedof_counter = 0; /*----------------------------------------------------------------------- * Loop over fine grid. *-----------------------------------------------------------------------*/ for (i = 0; i < num_dofs; i++) { /*-------------------------------------------------------------------- * If i is a c-point, interpolation is the identity. *--------------------------------------------------------------------*/ if (CF_marker[i] >= 0) { dof_neighbor_coarsedof_counter++; } /*-------------------------------------------------------------------- * If i is a f-point, interpolation is from the C-points that * strongly influence i. *--------------------------------------------------------------------*/ else { for (j = S_i[i]; j < S_i[i+1]; j++) { i_dof = S_j[j]; if (CF_marker[i_dof] >= 0) { dof_neighbor_coarsedof_counter++; } } } } /*----------------------------------------------------------------------- * Allocate arrays. *-----------------------------------------------------------------------*/ i_dof_neighbor_coarsedof = hypre_CTAlloc(HYPRE_Int, num_dofs+1); j_dof_neighbor_coarsedof = hypre_CTAlloc(HYPRE_Int, dof_neighbor_coarsedof_counter); Prolong_coeff = hypre_CTAlloc(HYPRE_Real, dof_neighbor_coarsedof_counter); dof_neighbor_coarsedof_counter = 0; for (i = 0; i < num_dofs; i++) { i_dof_neighbor_coarsedof[i] = dof_neighbor_coarsedof_counter; /*-------------------------------------------------------------------- * If i is a c-point, the neighbor is i; *--------------------------------------------------------------------*/ if (CF_marker[i] >= 0) { j_dof_neighbor_coarsedof[dof_neighbor_coarsedof_counter] = i; dof_neighbor_coarsedof_counter++; } /*-------------------------------------------------------------------- * If i is a f-point, interpolation is from the C-points that * strongly influence i. *--------------------------------------------------------------------*/ else { for (j = S_i[i]; j < S_i[i+1]; j++) { i_dof = S_j[j]; if (CF_marker[i_dof] >= 0) { j_dof_neighbor_coarsedof[dof_neighbor_coarsedof_counter] = i_dof; dof_neighbor_coarsedof_counter++; } } } } i_dof_neighbor_coarsedof[num_dofs] = dof_neighbor_coarsedof_counter; i_global_to_local = hypre_CTAlloc(HYPRE_Int, num_dofs); for (i_dof =0; i_dof < num_dofs; i_dof++) i_global_to_local[i_dof] = -1; for (i_dof =0; i_dof < num_dofs; i_dof++) { if (CF_marker[i_dof] < 0) { local_dof_counter = 0; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_dof = j_dof_dof[j]; if (i_global_to_local[j_dof] < 0) { i_global_to_local[j_dof] = local_dof_counter; local_dof_counter++; } } if (local_dof_counter > max_local_dof_counter) max_local_dof_counter = local_dof_counter; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_dof = j_dof_dof[j]; i_global_to_local[j_dof] = -1; } } } i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); AE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); i_fine = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_coarse = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_fine_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_coarse_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); P_ext_int = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); /* for (i_loc =0; i_loc < max_local_dof_counter; i_loc++) for (j_loc =0; j_loc < max_local_dof_counter; j_loc++) P_ext_int[j_loc + i_loc * max_local_dof_counter] = 0.e0; */ i_ext_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter+1); j_ext_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter * max_local_dof_counter); for (l_loc=0; l_loc < max_local_dof_counter; l_loc++) i_int[l_loc] = -1; for (i_dof =0; i_dof < num_dofs; i_dof++) { if (CF_marker[i_dof] < 0) { local_dof_counter = 0; for (j=i_dof_dof[i_dof]; j<i_dof_dof[i_dof+1]; j++) { j_dof = j_dof_dof[j]; if (i_global_to_local[j_dof] < 0) { i_local_to_global[local_dof_counter] = j_dof; i_global_to_local[j_dof] = local_dof_counter; local_dof_counter++; } } dof_counter = 0; i_int[i_global_to_local[i_dof]]=dof_counter; dof_counter++; for (j = i_dof_neighbor_coarsedof[i_dof]; j < i_dof_neighbor_coarsedof[i_dof+1]; j++) { j_dof = j_dof_neighbor_coarsedof[j]; if (i_int[i_global_to_local[j_dof]] < 0) { i_int[i_global_to_local[j_dof]] = dof_counter; dof_counter++; } } for (i=0; i < dof_counter; i++) i_coarse_to_global[i] = -1; coarse_node_counter = 0; for (j = i_dof_neighbor_coarsedof[i_dof]; j < i_dof_neighbor_coarsedof[i_dof+1]; j++) { i = i_global_to_local[j_dof_neighbor_coarsedof[j]]; i_coarse[coarse_node_counter] = i_int[i]; i_coarse_to_global[i_int[i]] = coarse_node_counter; coarse_node_counter++; } fine_node_counter = 0; for (i=0; i < local_dof_counter; i++) if (i_int[i] > -1) { if (i_coarse_to_global[i_int[i]] < 0) { i_fine[fine_node_counter] = i_int[i]; i_fine_to_global[i_int[i]] = fine_node_counter; fine_node_counter++; } } /* ============================================================ hypre_printf("fine nodes: %d; coarse nodes: %d\n", fine_node_counter, coarse_node_counter); =========================================================== */ if (fine_node_counter+coarse_node_counter != dof_counter) { hypre_printf("error in build_Prolong: %d + %d = %d\n", fine_node_counter, coarse_node_counter, dof_counter); return -1; } /* hypre_printf("local_dof_counter: %d, dof_counter: %d\n", local_dof_counter, dof_counter); */ ext_int_counter = 0; for (i_loc =0; i_loc < local_dof_counter; i_loc++) { i_ext_int[i_loc] = ext_int_counter; if (i_int[i_loc] >=0) { P_ext_int[i_loc + i_int[i_loc] * local_dof_counter] = 1.e0; j_ext_int[ext_int_counter] = i_loc; ext_int_counter++; } else { /* find the neighbors of i_local_to_global[i_loc] */ if (num_functions > 1) k = dof_func[i_local_to_global[i_loc]]; diag = 0.e0; for (j=i_dof_dof[i_local_to_global[i_loc]]; j<i_dof_dof[i_local_to_global[i_loc]+1]; j++) { j_dof = j_dof_dof[j]; if (i_global_to_local[j_dof] >= 0) if (i_int[i_global_to_local[j_dof]] >= 0) { if (num_functions > 1) if (dof_func[j_dof] == k) { j_ext_int[ext_int_counter]= i_global_to_local[j_dof]; ext_int_counter++; P_ext_int[i_loc + i_int[i_global_to_local[j_dof]] *local_dof_counter]= fabs(a_dof_dof[j]); diag +=fabs(a_dof_dof[j]); } if (num_functions== 1) { j_ext_int[ext_int_counter]= i_global_to_local[j_dof]; ext_int_counter++; P_ext_int[i_loc + i_int[i_global_to_local[j_dof]] *local_dof_counter]= fabs(a_dof_dof[j]); diag +=fabs(a_dof_dof[j]); } } } if (diag > 0.e0) for (j=i_ext_int[i_loc]; j < ext_int_counter; j++) P_ext_int[i_loc + i_int[j_ext_int[j]]*local_dof_counter] /=diag; } } i_ext_int[local_dof_counter] = ext_int_counter; /* multiply AE times P_ext_int: ================================== */ for (j_loc =0; j_loc < dof_counter; j_loc++) AE[i_int[i_global_to_local[i_dof]]+j_loc * dof_counter]= 0.e0; i_loc = i_global_to_local[i_dof]; /* for (l_loc =0; l_loc < local_dof_counter; l_loc++) */ for (i=i_dof_dof[i_dof]; i < i_dof_dof[i_dof+1]; i++) { l_loc = i_global_to_local[j_dof_dof[i]]; for (j=i_ext_int[l_loc]; j < i_ext_int[l_loc+1]; j++) { j_loc = j_ext_int[j]; AE[i_int[i_loc]+i_int[j_loc] * dof_counter]+= a_dof_dof[i] * P_ext_int[l_loc + i_int[j_loc] * local_dof_counter]; } } } for (i = i_dof_neighbor_coarsedof[i_dof]; i < i_dof_neighbor_coarsedof[i_dof+1]; i++) { if (CF_marker[i_dof] < 0) { j_loc= i_coarse_to_global[i_int[i_global_to_local[ j_dof_neighbor_coarsedof[i]]]]; if (AE[i_fine[0]+dof_counter*i_fine[0]] !=0.e0) Prolong_coeff[i] = -AE[i_fine[0]+dof_counter *i_coarse[j_loc]] /AE[i_fine[0]+dof_counter*i_fine[0]]; else Prolong_coeff[i] = 0.e0; } else Prolong_coeff[i] = 1.e0; } if (CF_marker[i_dof] < 0) { i_int[i_global_to_local[i_dof]]=-1; for (j = i_dof_neighbor_coarsedof[i_dof]; j < i_dof_neighbor_coarsedof[i_dof+1]; j++) { j_dof = j_dof_neighbor_coarsedof[j]; i_int[i_global_to_local[j_dof]] = -1; } for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_dof = j_dof_dof[j]; i_global_to_local[j_dof] = -1; } } } /*----------------------------------------------------------------- for (i_dof =0; i_dof < num_dofs; i_dof++) { hypre_printf("\ndof %d: has coefficients:\n", i_dof); coeff_sum = 0.0; for (i = i_dof_neighbor_coarsedof[i_dof]; i < i_dof_neighbor_coarsedof[i_dof+1]; i++) { hypre_printf(" %f ", Prolong_coeff[i]); coeff_sum=coeff_sum+Prolong_coeff[i]; } hypre_printf("\n coeff_sum: %f \n\n", coeff_sum); } -----------------------------------------------------------------*/ fine_to_coarse = i_global_to_local; coarsedof_counter = 0; for (i=0; i < num_dofs; i++) if (CF_marker[i] >=0) { fine_to_coarse[i] = coarsedof_counter; coarsedof_counter++; } else fine_to_coarse[i] = -1; P = hypre_CSRMatrixCreate(num_dofs, coarsedof_counter, i_dof_neighbor_coarsedof[num_dofs]); hypre_CSRMatrixData(P) = Prolong_coeff; hypre_CSRMatrixI(P) = i_dof_neighbor_coarsedof; hypre_CSRMatrixJ(P) = j_dof_neighbor_coarsedof; for (i=0; i < num_dofs; i++) for (j=i_dof_neighbor_coarsedof[i]; j<i_dof_neighbor_coarsedof[i+1]; j++) hypre_CSRMatrixJ(P)[j] = fine_to_coarse[j_dof_neighbor_coarsedof[j]]; *P_ptr = P; if (num_functions > 1) { coarse_dof_func = hypre_CTAlloc(HYPRE_Int, coarsedof_counter); coarsedof_counter=0; for (i=0; i < num_dofs; i++) if (CF_marker[i] >=0) { coarse_dof_func[coarsedof_counter] = dof_func[i]; coarsedof_counter++; } /* return coarse_dof_func array: ---------------------------------------*/ *coarse_dof_func_ptr = coarse_dof_func; } hypre_TFree(i_int); hypre_TFree(i_coarse); hypre_TFree(i_fine); hypre_TFree(i_coarse_to_global); hypre_TFree(i_fine_to_global); hypre_TFree(AE); hypre_TFree(i_ext_int); hypre_TFree(j_ext_int); hypre_TFree(P_ext_int); hypre_TFree(i_global_to_local); hypre_TFree(i_local_to_global); return ierr; }
hypre_CSRMatrix * hypre_ParCSRMatrixExtractAExt( hypre_ParCSRMatrix *A, HYPRE_Int data, HYPRE_Int ** pA_ext_row_map ) { /* Note that A's role as the first factor in A*A^T is used only through ...CommPkgT(A), which basically says which rows of A (columns of A^T) are needed. In all the other places where A serves as an input, it is through its role as A^T, the matrix whose data needs to be passed between processors. */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkgT(A); /* ... CommPkgT(A) should identify all rows of A^T needed for A*A^T (that is * generally a bigger set than ...CommPkg(A), the rows of B needed for A*B) */ HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *diag_i = hypre_CSRMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRMatrixJ(diag); HYPRE_Complex *diag_data = hypre_CSRMatrixData(diag); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex *offd_data = hypre_CSRMatrixData(offd); HYPRE_Int num_cols_A, num_nonzeros; HYPRE_Int num_rows_A_ext; hypre_CSRMatrix *A_ext; HYPRE_Int *A_ext_i; HYPRE_Int *A_ext_j; HYPRE_Complex *A_ext_data; num_cols_A = hypre_ParCSRMatrixGlobalNumCols(A); num_rows_A_ext = recv_vec_starts[num_recvs]; hypre_ParCSRMatrixExtractBExt_Arrays ( &A_ext_i, &A_ext_j, &A_ext_data, pA_ext_row_map, &num_nonzeros, data, 1, comm, comm_pkg, num_cols_A, num_recvs, num_sends, first_col_diag, first_row_index, recv_vec_starts, send_map_starts, send_map_elmts, diag_i, diag_j, offd_i, offd_j, col_map_offd, diag_data, offd_data ); A_ext = hypre_CSRMatrixCreate(num_rows_A_ext,num_cols_A,num_nonzeros); hypre_CSRMatrixI(A_ext) = A_ext_i; hypre_CSRMatrixJ(A_ext) = A_ext_j; if (data) hypre_CSRMatrixData(A_ext) = A_ext_data; return A_ext; }
HYPRE_Int hypre_CreateLambda(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ MPI_Comm comm; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_ParCSRMatrix *Lambda; hypre_CSRMatrix *L_diag; hypre_CSRMatrix *L_offd; hypre_CSRMatrix *A_tmp_diag; hypre_CSRMatrix *A_tmp_offd; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; hypre_ParCSRCommPkg *comm_pkg; hypre_ParCSRCommPkg *L_comm_pkg = NULL; hypre_ParCSRCommHandle *comm_handle; HYPRE_Real *L_diag_data; HYPRE_Real *L_offd_data; HYPRE_Real *buf_data = NULL; HYPRE_Real *tmp_data; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *l1_norms; HYPRE_Real *A_tmp_diag_data; HYPRE_Real *A_tmp_offd_data; HYPRE_Real *D_data = NULL; HYPRE_Real *D_data_offd = NULL; HYPRE_Int *L_diag_i; HYPRE_Int *L_diag_j; HYPRE_Int *L_offd_i; HYPRE_Int *L_offd_j; HYPRE_Int *A_tmp_diag_i; HYPRE_Int *A_tmp_offd_i; HYPRE_Int *A_tmp_diag_j; HYPRE_Int *A_tmp_offd_j; HYPRE_Int *L_recv_ptr = NULL; HYPRE_Int *L_send_ptr = NULL; HYPRE_Int *L_recv_procs = NULL; HYPRE_Int *L_send_procs = NULL; HYPRE_Int *L_send_map_elmts = NULL; HYPRE_Int *recv_procs; HYPRE_Int *send_procs; HYPRE_Int *send_map_elmts; HYPRE_Int *send_map_starts; HYPRE_Int *recv_vec_starts; HYPRE_Int *all_send_procs = NULL; HYPRE_Int *all_recv_procs = NULL; HYPRE_Int *remap = NULL; HYPRE_Int *level_start; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_procs; HYPRE_Int num_sends, num_recvs; HYPRE_Int num_sends_L = 0; HYPRE_Int num_recvs_L = 0; HYPRE_Int send_data_L = 0; HYPRE_Int num_rows_L = 0; HYPRE_Int num_rows_tmp = 0; HYPRE_Int num_cols_offd_L = 0; HYPRE_Int num_cols_offd = 0; HYPRE_Int level, i, j, k; HYPRE_Int this_proc, cnt, cnt_diag, cnt_offd; HYPRE_Int cnt_recv, cnt_send, cnt_row, row_start; HYPRE_Int start_diag, start_offd, indx, cnt_map; HYPRE_Int start, j_indx, index, cnt_level; HYPRE_Int max_sends, max_recvs; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_size(comm,&num_procs); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); addlvl = hypre_max(additive, mult_additive); num_add_lvls = num_levels+1-addlvl; level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1); send_data_L = 0; num_rows_L = 0; num_cols_offd_L = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; level_start[0] = 0; cnt = 1; max_sends = 0; max_recvs = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd); num_rows_L += num_rows_tmp; level_start[cnt] = level_start[cnt-1] + num_rows_tmp; cnt++; num_cols_offd_L += num_cols_offd; num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp]; num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); max_sends += num_sends; if (num_sends) send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends); max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg); } } if (max_sends >= num_procs ||max_recvs >= num_procs) { max_sends = num_procs; max_recvs = num_procs; } if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends); if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs); cnt_send = 0; cnt_recv = 0; if (max_sends || max_recvs) { if (max_sends < num_procs && max_recvs < num_procs) { for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); for (j = 0; j < num_sends; j++) all_send_procs[cnt_send++] = send_procs[j]; for (j = 0; j < num_recvs; j++) all_recv_procs[cnt_recv++] = recv_procs[j]; } } if (max_sends) { qsort0(all_send_procs, 0, max_sends-1); num_sends_L = 1; this_proc = all_send_procs[0]; for (i=1; i < max_sends; i++) { if (all_send_procs[i] > this_proc) { this_proc = all_send_procs[i]; all_send_procs[num_sends_L++] = this_proc; } } L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); for (j=0; j < num_sends_L; j++) L_send_procs[j] = all_send_procs[j]; hypre_TFree(all_send_procs); } if (max_recvs) { qsort0(all_recv_procs, 0, max_recvs-1); num_recvs_L = 1; this_proc = all_recv_procs[0]; for (i=1; i < max_recvs; i++) { if (all_recv_procs[i] > this_proc) { this_proc = all_recv_procs[i]; all_recv_procs[num_recvs_L++] = this_proc; } } L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); for (j=0; j < num_recvs_L; j++) L_recv_procs[j] = all_recv_procs[j]; hypre_TFree(all_recv_procs); } L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } for (k = 0; k < num_sends; k++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L); L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k]; } for (k = 0; k < num_recvs; k++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L); L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k]; } } L_recv_ptr[0] = 0; for (i=1; i < num_recvs_L; i++) L_recv_ptr[i+1] += L_recv_ptr[i]; L_send_ptr[0] = 0; for (i=1; i < num_sends_L; i++) L_send_ptr[i+1] += L_send_ptr[i]; } else { num_recvs_L = 0; num_sends_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); for (j = 0; j < num_sends; j++) { this_proc = send_procs[j]; if (all_send_procs[this_proc] == 0) num_sends_L++; all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j]; } for (j = 0; j < num_recvs; j++) { this_proc = recv_procs[j]; if (all_recv_procs[this_proc] == 0) num_recvs_L++; all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j]; } } } if (max_sends) { L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); num_sends_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_send_procs[j]; if (this_proc) { L_send_procs[num_sends_L++] = j; L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1]; } } } if (max_recvs) { L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); num_recvs_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_recv_procs[j]; if (this_proc) { L_recv_procs[num_recvs_L++] = j; L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1]; } } } } } if (max_sends) hypre_TFree(all_send_procs); if (max_recvs) hypre_TFree(all_recv_procs); L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag); L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd); hypre_CSRMatrixInitialize(L_diag); hypre_CSRMatrixInitialize(L_offd); if (num_nonzeros_diag) { L_diag_data = hypre_CSRMatrixData(L_diag); L_diag_j = hypre_CSRMatrixJ(L_diag); } L_diag_i = hypre_CSRMatrixI(L_diag); if (num_nonzeros_offd) { L_offd_data = hypre_CSRMatrixData(L_offd); L_offd_j = hypre_CSRMatrixJ(L_offd); } L_offd_i = hypre_CSRMatrixI(L_offd); if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L); if (send_data_L) { L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L); buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L); } if (num_cols_offd_L) { D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L); /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/ remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L); } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); cnt = 0; cnt_level = 0; cnt_diag = 0; cnt_offd = 0; cnt_row = 1; L_diag_i[0] = 0; L_offd_i[0] = 0; for (level=addlvl; level < num_levels; level++) { row_start = level_start[cnt_level]; if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } cnt_level++; start_diag = L_diag_i[cnt_row-1]; start_offd = L_offd_i[cnt_row-1]; A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag); A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd); A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } /* Compute new combined communication package */ for (i=0; i < num_sends; i++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L); indx = L_send_ptr[this_proc]; for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++) { L_send_map_elmts[indx++] = row_start + send_map_elmts[j]; } L_send_ptr[this_proc] = indx; } cnt_map = 0; for (i = 0; i < num_recvs; i++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L); indx = L_recv_ptr[this_proc]; for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { remap[cnt_map++] = indx++; } L_recv_ptr[this_proc] = indx; } /* Compute Lambda */ if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = 1.0/l1_norms[i]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } if (num_procs > 1) { index = 0; for (i=0; i < num_sends; i++) { start = send_map_starts[i]; for (j=start; j < send_map_starts[i+1]; j++) buf_data[index++] = D_data[send_map_elmts[j]]; } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, D_data_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } for (i = 0; i < num_rows_tmp; i++) { j_indx = A_tmp_diag_i[i]; L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i]; L_diag_j[cnt_diag++] = i+row_start; for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++) { j_indx = A_tmp_diag_j[j]; L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i]; L_diag_j[cnt_diag++] = j_indx+row_start; } for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++) { j_indx = A_tmp_offd_j[j]; L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i]; L_offd_j[cnt_offd++] = remap[j_indx]; } } cnt_row += num_rows_tmp; } if (L_send_ptr) { for (i=num_sends_L-1; i > 0; i--) L_send_ptr[i] = L_send_ptr[i-1]; L_send_ptr[0] = 0; } else L_send_ptr = hypre_CTAlloc(HYPRE_Int,1); if (L_recv_ptr) { for (i=num_recvs_L-1; i > 0; i--) L_recv_ptr[i] = L_recv_ptr[i-1]; L_recv_ptr[0] = 0; } else L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1); L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L; hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L; hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs; hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs; hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr; hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr; hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts; hypre_ParCSRCommPkgComm(L_comm_pkg) = comm; Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1); hypre_ParCSRMatrixDiag(Lambda) = L_diag; hypre_ParCSRMatrixOffd(Lambda) = L_offd; hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg; hypre_ParCSRMatrixComm(Lambda) = comm; hypre_ParCSRMatrixOwnsData(Lambda) = 1; hypre_ParAMGDataLambda(amg_data) = Lambda; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; hypre_TFree(D_data_offd); hypre_TFree(D_data); if (num_procs > 1) hypre_TFree(buf_data); hypre_TFree(remap); hypre_TFree(buf_data); hypre_TFree(level_start); return Solve_err_flag; }
hypre_CSRMatrix * hypre_GenerateStencilMatrix(HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, char *infile ) { hypre_CSRMatrix *A; HYPRE_Int *A_i; HYPRE_Int *A_j; double *A_data; HYPRE_Int grid_size = nx*ny*nz; HYPRE_Int stencil_size; typedef HYPRE_Int Index[3]; Index *stencil_offsets; double *stencil_values; HYPRE_Int ix, iy, iz, i, j, k, s, ss; HYPRE_Int I, J, jj; FILE *fp; /*--------------------------------------------------- * read in the stencil (diagonal must be first) *---------------------------------------------------*/ fp = fopen(infile, "r"); hypre_fscanf(fp, "%d\n", &stencil_size); stencil_offsets = hypre_CTAlloc(Index, stencil_size); stencil_values = hypre_CTAlloc(double, stencil_size); for (s = 0; s < stencil_size; s++) { hypre_fscanf(fp, "%d", &ss); hypre_fscanf(fp, "%d%d%d %lf\n", &stencil_offsets[ss][0], &stencil_offsets[ss][1], &stencil_offsets[ss][2], &stencil_values[ss]); hypre_printf("%d %d %d %d %f\n", ss, stencil_offsets[ss][0], stencil_offsets[ss][1], stencil_offsets[ss][2], stencil_values[ss]); } fclose(fp); /*--------------------------------------------------- * set up matrix *---------------------------------------------------*/ A_i = hypre_CTAlloc(HYPRE_Int, grid_size + 1); A_j = hypre_CTAlloc(HYPRE_Int, grid_size * stencil_size); A_data = hypre_CTAlloc(double, grid_size * stencil_size); jj = 0; for (iz = 0; iz < nz; iz++) { for (iy = 0; iy < ny; iy++) { for (ix = 0; ix < nx; ix++) { I = ix + iy*nx + iz*ny*nz; A_i[I] = jj; for (s = 0; s < stencil_size; s++) { i = ix + stencil_offsets[s][0]; j = iy + stencil_offsets[s][1]; k = iz + stencil_offsets[s][2]; if ((i > -1) && (i < nx) && (j > -1) && (j < ny) && (k > -1) && (k < nz)) { J = i + j*nx + k*ny*nz; A_j[jj] = J; A_data[jj] = stencil_values[s]; jj++; } } } } } A_i[grid_size] = jj; A = hypre_CSRMatrixCreate(grid_size, grid_size, A_i[grid_size]); hypre_CSRMatrixI(A) = A_i; hypre_CSRMatrixJ(A) = A_j; hypre_CSRMatrixData(A) = A_data; return A; }
HYPRE_Int hypre_ParChordMatrixToParCSRMatrix( hypre_ParChordMatrix *Ac, MPI_Comm comm, hypre_ParCSRMatrix **pAp ) { /* Some parts of this function are copied from hypre_CSRMatrixToParCSRMatrix. */ hypre_ParCSRMatrix *Ap; HYPRE_Int *row_starts, *col_starts; HYPRE_Int global_num_rows, global_num_cols, my_id, num_procs; HYPRE_Int num_cols_offd, num_nonzeros_diag, num_nonzeros_offd; HYPRE_Int *local_num_rows; /* not computed HYPRE_Int *local_num_nonzeros; */ HYPRE_Int num_nonzeros, first_col_diag, last_col_diag; HYPRE_Int i,ic,ij,ir,ilocal,p,r,r_p,r_global,r_local, jlen; HYPRE_Int *a_i, *a_j, *ilen; HYPRE_Int **rdofs, **ps; double data; double *a_data; double **datas; hypre_CSRMatrix *local_A; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); hypre_ParChordMatrix_RowStarts ( Ac, comm, &row_starts, &global_num_cols ); /* ... this function works correctly only under some assumptions; see the function definition for details */ global_num_rows = row_starts[num_procs] - row_starts[0]; col_starts = NULL; /* The offd and diag blocks aren't defined until we have both row and column partitions... */ num_cols_offd = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; Ap = hypre_ParCSRMatrixCreate( comm, global_num_rows, global_num_cols, row_starts, col_starts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); *pAp = Ap; row_starts = hypre_ParCSRMatrixRowStarts(Ap); col_starts = hypre_ParCSRMatrixColStarts(Ap); local_num_rows = hypre_CTAlloc(HYPRE_Int, num_procs); for (i=0; i < num_procs; i++) local_num_rows[i] = row_starts[i+1] - row_starts[i]; num_nonzeros = 0; for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { num_nonzeros += hypre_ParChordMatrixNumInchords(Ac)[p]; }; local_A = hypre_CSRMatrixCreate( local_num_rows[my_id], global_num_cols, num_nonzeros ); /* Compute local CSRMatrix-like i,j arrays for this processor. */ ps = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); rdofs = hypre_CTAlloc( HYPRE_Int*, hypre_ParChordMatrixNumIdofs(Ac) ); datas = hypre_CTAlloc( double*, hypre_ParChordMatrixNumIdofs(Ac) ); ilen = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac) ); jlen = 0; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { ilen[i] = 0; ps[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); rdofs[i] = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumRdofs(Ac) ); datas[i] = hypre_CTAlloc( double, hypre_ParChordMatrixNumRdofs(Ac) ); /* ... rdofs[i], datas[i] will generally, not always, be much too big */ } for ( p=0; p<hypre_ParChordMatrixNumInprocessors(Ac); ++p ) { for ( ic=0; ic<hypre_ParChordMatrixNumInchords(Ac)[p]; ++ic ) { ilocal = hypre_ParChordMatrixInchordIdof(Ac)[p][ic]; r = hypre_ParChordMatrixInchordRdof(Ac)[p][ic]; data = hypre_ParChordMatrixInchordData(Ac)[p][ic]; ps[ilocal][ ilen[ilocal] ] = p; rdofs[ilocal][ ilen[ilocal] ] = r; datas[ilocal][ ilen[ilocal] ] = data; ++ilen[ilocal]; ++jlen; } }; a_i = hypre_CTAlloc( HYPRE_Int, hypre_ParChordMatrixNumIdofs(Ac)+1 ); a_j = hypre_CTAlloc( HYPRE_Int, jlen ); a_data = hypre_CTAlloc( double, jlen ); a_i[0] = 0; for ( ilocal=0; ilocal<hypre_ParChordMatrixNumIdofs(Ac); ++ilocal ) { a_i[ilocal+1] = a_i[ilocal] + ilen[ilocal]; ir = 0; for ( ij=a_i[ilocal]; ij<a_i[ilocal+1]; ++ij ) { p = ps[ilocal][ir]; r_p = rdofs[ilocal][ir]; /* local in proc. p */ r_global = r_p + hypre_ParChordMatrixFirstindexRdof(Ac)[p]; r_local = r_global - hypre_ParChordMatrixFirstindexRdof(Ac)[my_id]; a_j[ij] = r_local; a_data[ij] = datas[ilocal][ir]; ir++; }; }; for ( i=0; i<hypre_ParChordMatrixNumIdofs(Ac); ++i ) { hypre_TFree( ps[i] ); hypre_TFree( rdofs[i] ); hypre_TFree( datas[i] ); }; hypre_TFree( ps ); hypre_TFree( rdofs ); hypre_TFree( datas ); hypre_TFree( ilen ); first_col_diag = col_starts[my_id]; last_col_diag = col_starts[my_id+1]-1; hypre_CSRMatrixData(local_A) = a_data; hypre_CSRMatrixI(local_A) = a_i; hypre_CSRMatrixJ(local_A) = a_j; hypre_CSRMatrixOwnsData(local_A) = 0; GenerateDiagAndOffd(local_A, Ap, first_col_diag, last_col_diag); /* set pointers back to NULL before destroying */ if (my_id == 0) { hypre_TFree(a_data); /* ... the data has been copied into different diag & offd arrays of Ap */ hypre_TFree(a_j); hypre_TFree(a_i); hypre_CSRMatrixData(local_A) = NULL; hypre_CSRMatrixI(local_A) = NULL; hypre_CSRMatrixJ(local_A) = NULL; } hypre_CSRMatrixDestroy(local_A); hypre_TFree(local_num_rows); /* hypre_TFree(csr_matrix_datatypes);*/ return 0; }