void hypre_NumbersDeleteNode( hypre_NumbersNode * node ) /* deletes a node and the tree of which it is root */ { HYPRE_Int i; for ( i=0; i<=10; ++i ) if ( node->digit[i] != NULL ) { hypre_NumbersDeleteNode( node->digit[i] ); node->digit[i] = NULL; }; hypre_TFree( node ); }
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };