void hypre_MatTCommPkgCreate_core ( /* input args: */ MPI_Comm comm, HYPRE_Int * col_map_offd, HYPRE_Int first_col_diag, HYPRE_Int * col_starts, HYPRE_Int num_rows_diag, HYPRE_Int num_cols_diag, HYPRE_Int num_cols_offd, HYPRE_Int * row_starts, HYPRE_Int firstColDiag, HYPRE_Int * colMapOffd, HYPRE_Int * mat_i_diag, HYPRE_Int * mat_j_diag, HYPRE_Int * mat_i_offd, HYPRE_Int * mat_j_offd, HYPRE_Int data, /* = 1 for a matrix with floating-point data, =0 for Boolean matrix */ /* pointers to output args: */ HYPRE_Int * p_num_recvs, HYPRE_Int ** p_recv_procs, HYPRE_Int ** p_recv_vec_starts, HYPRE_Int * p_num_sends, HYPRE_Int ** p_send_procs, HYPRE_Int ** p_send_map_starts, HYPRE_Int ** p_send_map_elmts ) { HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int i, j, j2, k, ir, rowmin, rowmax; HYPRE_Int *tmp, *recv_buf, *displs, *info, *send_buf, *all_num_sends3; HYPRE_Int num_procs, my_id, num_elmts; HYPRE_Int local_info, index, index2; HYPRE_Int pmatch, col, kc, p; HYPRE_Int * recv_sz_buf; HYPRE_Int * row_marker; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); info = hypre_CTAlloc(HYPRE_Int, num_procs); /* ---------------------------------------------------------------------- * determine which processors to receive from (set proc_mark) and num_recvs, * at the end of the loop proc_mark[i] contains the number of elements to be * received from Proc. i * * - For A*b or A*B: for each off-diagonal column i of A, you want to identify * the processor which has the corresponding element i of b (row i of B) * (columns in the local diagonal block, just multiply local rows of B). * You do it by finding the processor which has that column of A in its * _diagonal_ block - assuming b or B is distributed the same, which I believe * is evenly among processors, by row. There is a unique solution because * the diag/offd blocking is defined by which processor owns which rows of A. * * - For A*A^T: A^T is not distributed by rows as B or any 'normal' matrix is. * For each off-diagonal row,column k,i element of A, you want to identify * the processors which have the corresponding row,column i,j elements of A^T * i.e., row,column j,i elements of A (all i,j,k for which these entries are * nonzero, row k of A lives on this processor, and row j of A lives on * a different processor). So, given a column i in the local A-offd or A-diag, * we want to find all the processors which have column i, in diag or offd * blocks. Unlike the A*B case, I don't think you can eliminate looking at * any class of blocks. * ---------------------------------------------------------------------*/ /* The algorithm for A*B was: For each of my columns i (in offd block), use known information on data distribution of columns in _diagonal_ blocks to find the processor p which owns row i. (Note that for i in diag block, I own the row, nothing to do.) Count up such i's for each processor in proc_mark. Construct a data structure, recv_buf, made by appending a structure tmp from each processor. The data structure tmp looks like (p, no. of i's, i1, i2,...) (p=0,...) . There are two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you search for for the appearance of my (source) processor number (most of recv_buf pertains to other processors and is ignored). When you find the appropriate section, pull out the i's, count them and save them, in send_map_elmts, and save p in send_procs and index information in send_map_starts. */ /* The algorithm for A*A^T: [ Originally I had planned to figure out approximately which processors had the information (for A*B it could be done exactly) to save on communication. But even for A*B where the data owner is known, all data is sent to all processors, so that's not worth worrying about on the first cut. One consequence is that proc_mark is not needed.] Construct a data structure, recv_buf, made by appending a structure tmp for each processor. It simply consists of (no. of i's, i1, i2,...) where i is the global number of a column in the offd block. There are still two communication steps: gather size information (local_info) from all processors (into info), then gather the data (tmp) from all processors (into recv_buf). Then you go through recv_buf. For each (sink) processor p you go through all its column numbers in recv_buf. Check each one for whether you have data in that column. If so, put in in send_map_elmts, p in send_procs, and update the index information in send_map_starts. Note that these arrays don't mean quite the same thing as for A*B. */ num_recvs=num_procs-1; local_info = num_procs + num_cols_offd + num_cols_diag; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); /* ---------------------------------------------------------------------- * generate information to be send: tmp contains for each recv_proc: * {deleted: id of recv_procs}, number of elements to be received for this processor, * indices of elements (in this order) * ---------------------------------------------------------------------*/ displs = hypre_CTAlloc(HYPRE_Int, num_procs+1); displs[0] = 0; for (i=1; i < num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; recv_buf = hypre_CTAlloc(HYPRE_Int, displs[num_procs]); tmp = hypre_CTAlloc(HYPRE_Int, local_info); j = 0; for (i=0; i < num_procs; i++) { j2 = j++; tmp[j2] = 0; for (k=0; k < num_cols_offd; k++) if (col_map_offd[k] >= col_starts[i] && col_map_offd[k] < col_starts[i+1]) { tmp[j++] = col_map_offd[k]; ++(tmp[j2]); }; for (k=0; k < num_cols_diag; k++) if ( k+first_col_diag >= col_starts[i] && k+first_col_diag < col_starts[i+1] ) { tmp[j++] = k + first_col_diag; ++(tmp[j2]); } } hypre_MPI_Allgatherv(tmp,local_info,HYPRE_MPI_INT,recv_buf,info,displs,HYPRE_MPI_INT,comm); /* ---------------------------------------------------------------------- * determine send_procs and actual elements to be send (in send_map_elmts) * and send_map_starts whose i-th entry points to the beginning of the * elements to be send to proc. i * ---------------------------------------------------------------------*/ /* Meanings of arrays being set here, more verbosely stated: send_procs: processors p to send to send_map_starts: for each p, gives range of indices in send_map_elmts; send_map_elmts: Each element is a send_map_elmts[i], with i in a range given by send_map_starts[p..p+1], for some p. This element is is the global column number for a column in the offd block of p which is to be multiplied by data from this processor. For A*B, send_map_elmts[i] is therefore a row of B belonging to this processor, to be sent to p. For A*A^T, send_map_elmts[i] is a row of A belonging to this processor, to be sent to p; this row was selected because it has a nonzero on a _column_ needed by p. */ num_sends = num_procs; /* may turn out to be less, but we can't know yet */ num_elmts = (num_procs-1)*num_rows_diag; /* ... a crude upper bound; should try to do better even if more comm required */ send_procs = hypre_CTAlloc(HYPRE_Int, num_sends); send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1); send_map_elmts = hypre_CTAlloc(HYPRE_Int, num_elmts); row_marker = hypre_CTAlloc(HYPRE_Int,num_rows_diag); index = 0; index2 = 0; send_map_starts[0] = 0; for (i=0; i < num_procs; i++) { send_map_starts[index+1] = send_map_starts[index]; j = displs[i]; pmatch = 0; for ( ir=0; ir<num_rows_diag; ++ir ) row_marker[ir] = 0; while ( j < displs[i+1]) { num_elmts = recv_buf[j++]; /* no. of columns proc. i wants */ for ( k=0; k<num_elmts; k++ ) { col = recv_buf[j++]; /* a global column no. at proc. i */ for ( kc=0; kc<num_cols_offd; kc++ ) { if ( col_map_offd[kc]==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... offset = first_col_diag; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; */ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } /* alternative way of doing the following for-loop: for ( kc=0; kc<num_cols_diag; kc++ ) { if ( kc+first_col_diag==col && i!=my_id ) { / * this processor has the same column as proc. i (but is different) * / pmatch = 1; / * this would be right if we could send columns, but we can't ... >>> * / send_procs[index] = i; ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset; / * Plan to send all of my rows which use this column... * / / * NOT DONE * / } } */ for ( kc=row_starts[my_id]; kc<row_starts[my_id+1]; kc++ ) { if ( kc==col && i!=my_id ) { /* this processor has the same column as proc. i (but is different) */ pmatch = 1; send_procs[index] = i; /* this would be right if we could send columns, but we can't ... >>> ++send_map_starts[index+1]; send_map_elmts[index2++] = col - offset;*/ /* Plan to send all of my rows which use this column... */ RowsWithColumn( &rowmin, &rowmax, col, num_rows_diag, firstColDiag, colMapOffd, mat_i_diag, mat_j_diag, mat_i_offd, mat_j_offd ); for ( ir=rowmin; ir<=rowmax; ++ir ) { if ( row_marker[ir]==0 ) { row_marker[ir] = 1; ++send_map_starts[index+1]; send_map_elmts[index2++] = ir; } } } } } } if ( pmatch ) index++; } num_sends = index; /* no. of proc. rows will be sent to */ /* Compute receive arrays recv_procs, recv_vec_starts ... */ recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs); recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1); j2 = 0; for (i=0; i < num_procs; i++) { if ( i!=my_id ) { recv_procs[j2] = i; j2++; }; }; /* Compute recv_vec_starts. The real job is, for each processor p, to figure out how many rows p will send to me (me=this processor). I now know how many (and which) rows I will send to each p. Indeed, if send_procs[index]=p, then the number is send_map_starts[index+1]-send_map_starts[index]. More communication is needed. options: hypre_MPI_Allgather of communication sizes. <--- my choice, for now good: simple bad: send num_procs*num_sends data, only need num_procs but: not that much data compared to previous communication hypre_MPI_Allgatherv of communication sizes, only for pairs of procs. that communicate good: less data than above bad: need extra commun. step to get recvcounts hypre_MPI_ISend,hypre_MPI_IRecv of each size, separately between each pair of procs. good: no excess data sent bad: lots of little messages but: Allgather might be done the same under the hood may be much slower than Allgather or may be a bit faster depending on implementations */ send_buf = hypre_CTAlloc( HYPRE_Int, 3*num_sends ); all_num_sends3 = hypre_CTAlloc( HYPRE_Int, num_procs ); /* scatter-gather num_sends, to set up the size for the main comm. step */ i = 3*num_sends; hypre_MPI_Allgather( &i, 1, HYPRE_MPI_INT, all_num_sends3, 1, HYPRE_MPI_INT, comm ); displs[0] = 0; for ( p=0; p<num_procs; ++p ) { displs[p+1] = displs[p] + all_num_sends3[p]; }; recv_sz_buf = hypre_CTAlloc( HYPRE_Int, displs[num_procs] ); /* scatter-gather size of row info to send, and proc. to send to */ index = 0; for ( i=0; i<num_sends; ++i ) { send_buf[index++] = send_procs[i]; /* processor to send to */ send_buf[index++] = my_id; send_buf[index++] = send_map_starts[i+1] - send_map_starts[i]; /* ... sizes of info to send */ }; hypre_MPI_Allgatherv( send_buf, 3*num_sends, HYPRE_MPI_INT, recv_sz_buf, all_num_sends3, displs, HYPRE_MPI_INT, comm); recv_vec_starts[0] = 0; j2 = 0; j = 0; for ( i=0; i<displs[num_procs]; i=i+3 ) { j = i; if ( recv_sz_buf[j++]==my_id ) { recv_procs[j2] = recv_sz_buf[j++]; recv_vec_starts[j2+1] = recv_vec_starts[j2] + recv_sz_buf[j++]; j2++; } } num_recvs = j2; #if 0 hypre_printf("num_procs=%i send_map_starts (%i):",num_procs,num_sends+1); for( i=0; i<=num_sends; ++i ) hypre_printf(" %i", send_map_starts[i] ); hypre_printf(" send_procs (%i):",num_sends); for( i=0; i<num_sends; ++i ) hypre_printf(" %i", send_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_sends=%i send_buf[0,1,2]=%i %i %i", my_id, num_sends, send_buf[0], send_buf[1], send_buf[2] ); hypre_printf(" all_num_sends3[0,1]=%i %i\n", all_num_sends3[0], all_num_sends3[1] ); hypre_printf("my_id=%i rcv_sz_buf (%i):", my_id, displs[num_procs] ); for( i=0; i<displs[num_procs]; ++i ) hypre_printf(" %i", recv_sz_buf[i] ); hypre_printf("\n"); hypre_printf("my_id=%i recv_vec_starts (%i):",my_id,num_recvs+1); for( i=0; i<=num_recvs; ++i ) hypre_printf(" %i", recv_vec_starts[i] ); hypre_printf(" recv_procs (%i):",num_recvs); for( i=0; i<num_recvs; ++i ) hypre_printf(" %i", recv_procs[i] ); hypre_printf("\n"); hypre_printf("my_id=%i num_recvs=%i recv_sz_buf[0,1,2]=%i %i %i\n", my_id, num_recvs, recv_sz_buf[0], recv_sz_buf[1], recv_sz_buf[2] ); #endif hypre_TFree(send_buf); hypre_TFree(all_num_sends3); hypre_TFree(tmp); hypre_TFree(recv_buf); hypre_TFree(displs); hypre_TFree(info); hypre_TFree(recv_sz_buf); hypre_TFree(row_marker); /* finish up with the hand-coded call-by-reference... */ *p_num_recvs = num_recvs; *p_recv_procs = recv_procs; *p_recv_vec_starts = recv_vec_starts; *p_num_sends = num_sends; *p_send_procs = send_procs; *p_send_map_starts = send_map_starts; *p_send_map_elmts = send_map_elmts; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; double *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); if (A_coarse) { double *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; double *recv_buf; HYPRE_Int *displs, *info; HYPRE_Int size; HYPRE_Int new_num_procs; hypre_MPI_Comm_size(new_comm, &new_num_procs); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); /*then u */ hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); /* clean up */ hypre_TFree(displs); hypre_TFree(info); hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); /*copy my part of U to parallel vector */ { double *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } } return(Solve_err_flag); }
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int not_finished_coarsening; HYPRE_Int level; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); /*initial */ level = p_level; not_finished_coarsening = 1; /* convert A at this level to sequential */ A = Par_A_array[level]; { double *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; double *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info, *displs, *displs2; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); double *A_diag_data = hypre_CSRMatrixData(A_diag); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); if (num_rows) { /* alloc space in seq data structure only for participating procs*/ HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, hypre_ParAMGDataNumFunctions(amg_data)); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(double, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(double, total_nnz); hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, hypre_MPI_DOUBLE, A_seq_data, info, displs2, hypre_MPI_DOUBLE, new_comm ); hypre_TFree(displs); hypre_TFree(displs2); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; hypre_ParAMGDataNewComm(amg_data) = new_comm; } hypre_TFree(info); hypre_TFree(ranks); } return 0; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; HYPRE_Real *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); /*if (A_coarse)*/ if (hypre_ParAMGDataParticipate(amg_data)) { HYPRE_Real *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; HYPRE_Real *recv_buf = NULL; HYPRE_Int *displs = NULL; HYPRE_Int *info = NULL; HYPRE_Int new_num_procs, my_id; hypre_MPI_Comm_size(new_comm, &new_num_procs); hypre_MPI_Comm_rank(new_comm, &my_id); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; if (redundant) hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); else hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); if (redundant || my_id ==0) { displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; if (F_coarse) { tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); } } if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); if (redundant || my_id ==0) { tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); } /*then u */ if (redundant) { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); hypre_TFree(displs); hypre_TFree(info); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); /* clean up */ if (redundant || my_id ==0) { hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); } /*copy my part of U to parallel vector */ if (redundant) { HYPRE_Real *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } else { HYPRE_Real *local_data=NULL; if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, u_data, n, HYPRE_MPI_REAL, 0, new_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ if (my_id == 0) hypre_TFree(displs); hypre_TFree(info); } } return(Solve_err_flag); }