HYPRE_ParCSRMatrix GenerateRotate7pt( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int p, HYPRE_Int q, HYPRE_Real alpha, HYPRE_Real eps ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Real *value; HYPRE_Real ac, bc, cc, s, c, pi, x; HYPRE_Int *global_part; HYPRE_Int ix, iy; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int *work; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local; HYPRE_Int nx_size, ny_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny; value = hypre_CTAlloc(HYPRE_Real,4); pi = 4.0*atan(1.0); x = pi*alpha/180.0; s = sin(x); c = cos(x); ac = -(c*c + eps*s*s); bc = 2.0*(1.0 - eps)*s*c; cc = -(s*s + eps*c*c); value[0] = -2*(2*ac+bc+2*cc); value[1] = 2*ac+bc; value[2] = bc+2*cc; value[3] = -bc; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q+1); global_part[0] = 0; cnt = 1; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size; } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; my_id = q*P + p; num_procs = P*Q; local_num_rows = nx_local*ny_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); num_cols_offd = 0; if (p) num_cols_offd += ny_local; if (p < P_busy-1) num_cols_offd += ny_local; if (q) num_cols_offd += nx_local; if (q < Q_busy-1) num_cols_offd += nx_local; if (p && q) num_cols_offd++; if (p && q < Q_busy-1 ) num_cols_offd++; if (p < P_busy-1 && q ) num_cols_offd++; if (p < P_busy-1 && q < Q_busy-1 ) num_cols_offd++; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 0; o_cnt = 0; diag_i[0] = 0; offd_i[0] = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { cnt++; o_cnt++; diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iy > ny_part[q]) { diag_i[cnt]++; if (ix > nx_part[p]) { diag_i[cnt]++; } else { if (ix) offd_i[o_cnt]++; } } else { if (iy) { offd_i[o_cnt]++; if (ix > nx_part[p]) { offd_i[o_cnt]++; } else if (ix) { offd_i[o_cnt]++; } } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) { diag_i[cnt]++; if (ix < nx_part[p+1]-1) { diag_i[cnt]++; } else { if (ix+1 < nx) offd_i[o_cnt]++; } } else { if (iy+1 < ny) { offd_i[o_cnt]++; if (ix < nx_part[p+1]-1) { offd_i[o_cnt]++; } else if (ix < nx-1) { offd_i[o_cnt]++; } } } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iy > ny_part[q]) { if (ix > nx_part[p]) { diag_j[cnt] = row_index-nx_local-1 ; diag_data[cnt++] = value[3]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { if (ix > nx_part[p]) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy-1,p-1,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } offd_j[o_cnt] = hypre_map2(ix,iy-1,p,q-1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map2(ix-1,iy,p-1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[1]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[1]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[2]; if (ix < nx_part[p+1]-1) { diag_j[cnt] = row_index+nx_local+1 ; diag_data[cnt++] = value[3]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map2(ix,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[2]; if (ix < nx_part[p+1]-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } else if (ix < nx-1) { offd_j[o_cnt] = hypre_map2(ix+1,iy+1,p+1,q+1,P,Q, nx_part,ny_part,global_part); offd_data[o_cnt++] = value[3]; } } } row_index++; } } if (num_procs > 1) { work = hypre_CTAlloc(HYPRE_Int,o_cnt); for (i=0; i < o_cnt; i++) work[i] = offd_j[i]; qsort0(work, 0, o_cnt-1); col_map_offd[0] = work[0]; cnt = 0; for (i=0; i < o_cnt; i++) { if (work[i] > col_map_offd[cnt]) { cnt++; col_map_offd[cnt] = work[i]; } } for (i=0; i < o_cnt; i++) { for (j=0; j < num_cols_offd; j++) { if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } } hypre_TFree(work); } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(value); return (HYPRE_ParCSRMatrix) A; }
int HYPRE_LSI_SchwarzDecompose(HYPRE_LSI_Schwarz *sch_ptr,MH_Matrix *Amat, int total_recv_leng, int *recv_lengths, int *ext_ja, double *ext_aa, int *map, int *map2, int Noffset) { int i, j, k, nnz, *mat_ia, *mat_ja; int **bmat_ia, **bmat_ja; int mypid, *blk_size, index, **blk_indices, **aux_bmat_ia; int ncnt, rownum, offset, Nrows, extNrows, **aux_bmat_ja; int *tmp_blk_leng, *cols, rowleng; int nblocks, col_ind, init_size, aux_nnz, max_blk_size; int *tmp_indices, cur_off_row, length; double *mat_aa, *vals, **aux_bmat_aa, **bmat_aa; /* --------------------------------------------------------- */ /* fetch Schwarz parameters */ /* --------------------------------------------------------- */ MPI_Comm_rank(sch_ptr->comm, &mypid); Nrows = sch_ptr->Nrows; extNrows = Nrows + total_recv_leng; sch_ptr->Nrows = Nrows; sch_ptr->extNrows = extNrows; /* --------------------------------------------------------- */ /* adjust the off-processor row data */ /* --------------------------------------------------------- */ offset = 0; for ( i = 0; i < total_recv_leng; i++ ) { for ( j = offset; j < offset+recv_lengths[i]; j++ ) { index = ext_ja[j]; if ( index >= Noffset && index < Noffset+Nrows ) ext_ja[j] = index - Noffset; else { col_ind = HYPRE_LSI_Search(map, index, extNrows-Nrows); if ( col_ind >= 0 ) ext_ja[j] = map2[col_ind] + Nrows; else ext_ja[j] = -1; } } offset += recv_lengths[i]; } /* --------------------------------------------------------- */ /* compose the initial blk_size information */ /* and extend the each block for the overlap */ /* (at the end blk_indices and bli_size contains the info) */ /* --------------------------------------------------------- */ if ( sch_ptr->nblocks == 1 ) { nblocks = 1; max_blk_size = extNrows; sch_ptr->blk_sizes = (int *) malloc(nblocks * sizeof(int)); blk_size = sch_ptr->blk_sizes; blk_size[0] = extNrows; } else { if ( sch_ptr->nblocks != 0 ) { nblocks = sch_ptr->nblocks; sch_ptr->block_size = (Nrows + nblocks / 2) / nblocks; } else { nblocks = (Nrows - sch_ptr->block_size / 2) / sch_ptr->block_size + 1; sch_ptr->nblocks = nblocks; } sch_ptr->blk_indices = (int **) malloc(nblocks * sizeof(int*)); sch_ptr->blk_sizes = (int *) malloc(nblocks * sizeof(int)); blk_indices = sch_ptr->blk_indices; blk_size = sch_ptr->blk_sizes; tmp_blk_leng = (int *) malloc(nblocks * sizeof(int) ); for ( i = 0; i < nblocks-1; i++ ) blk_size[i] = sch_ptr->block_size; blk_size[nblocks-1] = Nrows - sch_ptr->block_size * (nblocks - 1 ); for ( i = 0; i < nblocks; i++ ) { tmp_blk_leng[i] = 5 * blk_size[i] + 5; blk_indices[i] = (int *) malloc(tmp_blk_leng[i] * sizeof(int)); for (j = 0; j < blk_size[i]; j++) blk_indices[i][j] = sch_ptr->block_size * i + j; } max_blk_size = 0; for ( i = 0; i < nblocks; i++ ) { init_size = blk_size[i]; for ( j = 0; j < init_size; j++ ) { rownum = blk_indices[i][j]; cols = &(Amat->colnum[Amat->rowptr[rownum]]); vals = &(Amat->values[Amat->rowptr[rownum]]); rowleng = Amat->rowptr[rownum+1] - Amat->rowptr[rownum]; if ( blk_size[i] + rowleng > tmp_blk_leng[i] ) { tmp_indices = blk_indices[i]; tmp_blk_leng[i] = 2 * ( blk_size[i] + rowleng ) + 2; blk_indices[i] = (int *) malloc(tmp_blk_leng[i] * sizeof(int)); for (k = 0; k < blk_size[i]; k++) blk_indices[i][k] = tmp_indices[k]; free( tmp_indices ); } for ( k = 0; k < rowleng; k++ ) { col_ind = cols[k]; blk_indices[i][blk_size[i]++] = col_ind; } } qsort0(blk_indices[i], 0, blk_size[i]-1); ncnt = 0; for ( j = 1; j < blk_size[i]; j++ ) if ( blk_indices[i][j] != blk_indices[i][ncnt] ) blk_indices[i][++ncnt] = blk_indices[i][j]; blk_size[i] = ncnt + 1; if ( blk_size[i] > max_blk_size ) max_blk_size = blk_size[i]; } free(tmp_blk_leng); } /* --------------------------------------------------------- */ /* compute the memory requirements for each block */ /* --------------------------------------------------------- */ sch_ptr->bmat_ia = (int **) malloc(nblocks * sizeof(int*) ); sch_ptr->bmat_ja = (int **) malloc(nblocks * sizeof(int*) ); sch_ptr->bmat_aa = (double **) malloc(nblocks * sizeof(double*) ); bmat_ia = sch_ptr->bmat_ia; bmat_ja = sch_ptr->bmat_ja; bmat_aa = sch_ptr->bmat_aa; if ( nblocks != 1 ) { sch_ptr->aux_bmat_ia = (int **) malloc(nblocks * sizeof(int*) ); sch_ptr->aux_bmat_ja = (int **) malloc(nblocks * sizeof(int*) ); sch_ptr->aux_bmat_aa = (double **) malloc(nblocks * sizeof(double*) ); aux_bmat_ia = sch_ptr->aux_bmat_ia; aux_bmat_ja = sch_ptr->aux_bmat_ja; aux_bmat_aa = sch_ptr->aux_bmat_aa; } else { aux_bmat_ia = NULL; aux_bmat_ja = NULL; aux_bmat_aa = NULL; } /* --------------------------------------------------------- */ /* compose each block into sch_ptr */ /* --------------------------------------------------------- */ cols = (int *) malloc( max_blk_size * sizeof(int) ); vals = (double *) malloc( max_blk_size * sizeof(double) ); for ( i = 0; i < nblocks; i++ ) { nnz = aux_nnz = offset = cur_off_row = 0; if ( nblocks > 1 ) length = blk_size[i]; else length = extNrows; for ( j = 0; j < length; j++ ) { if ( nblocks > 1 ) rownum = blk_indices[i][j]; else rownum = j; if ( rownum < Nrows ) { rowleng = 0; for ( k = Amat->rowptr[rownum]; k < Amat->rowptr[rownum+1]; k++ ) cols[rowleng++] = Amat->colnum[k]; } else { for ( k = cur_off_row; k < rownum-Nrows; k++ ) offset += recv_lengths[k]; cur_off_row = rownum - Nrows; rowleng = 0; for ( k = offset; k < offset+recv_lengths[cur_off_row]; k++ ) if ( ext_ja[k] != -1 ) cols[rowleng++] = ext_ja[k]; } for ( k = 0; k < rowleng; k++ ) { if ( nblocks > 1 ) index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]); else index = cols[k]; if ( index >= 0 ) nnz++; else aux_nnz++; } } bmat_ia[i] = (int *) malloc( (length + 1) * sizeof(int)); bmat_ja[i] = (int *) malloc( nnz * sizeof(int)); bmat_aa[i] = (double *) malloc( nnz * sizeof(double)); mat_ia = bmat_ia[i]; mat_ja = bmat_ja[i]; mat_aa = bmat_aa[i]; if ( nblocks > 1 ) { aux_bmat_ia[i] = (int *) malloc( (blk_size[i] + 1) * sizeof(int)); aux_bmat_ja[i] = (int *) malloc( aux_nnz * sizeof(int)); aux_bmat_aa[i] = (double *) malloc( aux_nnz * sizeof(double)); } /* ------------------------------------------------------ */ /* load the submatrices */ /* ------------------------------------------------------ */ nnz = aux_nnz = offset = cur_off_row = 0; mat_ia[0] = 0; if ( nblocks > 1 ) aux_bmat_ia[i][0] = 0; for ( j = 0; j < blk_size[i]; j++ ) { if ( nblocks > 1 ) rownum = blk_indices[i][j]; else rownum = j; if ( rownum < Nrows ) { rowleng = 0; for ( k = Amat->rowptr[rownum]; k < Amat->rowptr[rownum+1]; k++ ) { vals[rowleng] = Amat->values[k]; cols[rowleng++] = Amat->colnum[k]; } } else { for ( k = cur_off_row; k < rownum-Nrows; k++ ) { offset += recv_lengths[k]; } cur_off_row = rownum - Nrows; rowleng = 0; for ( k = offset; k < offset+recv_lengths[cur_off_row]; k++ ) { if ( ext_ja[k] != -1 ) { cols[rowleng] = ext_ja[k]; vals[rowleng++] = ext_aa[k]; } } } for ( k = 0; k < rowleng; k++ ) { if ( nblocks > 1 ) index = HYPRE_LSI_Search( blk_indices[i], cols[k], blk_size[i]); else index = cols[k]; if ( index >= 0 ) { mat_ja[nnz] = index; mat_aa[nnz++] = vals[k]; } else { aux_bmat_ja[i][aux_nnz] = cols[k]; aux_bmat_aa[i][aux_nnz++] = vals[k]; } } mat_ia[j+1] = nnz; if ( nblocks > 1 ) aux_bmat_ia[i][j+1] = aux_nnz; } for ( j = 0; j < mat_ia[blk_size[i]]; j++ ) if ( mat_ja[j] < 0 || mat_ja[j] >= length ) printf("block %d has index %d\n", i, mat_ja[j]); } free( cols ); free( vals ); /* --------------------------------------------------------- */ /* decompose each block */ /* --------------------------------------------------------- */ HYPRE_LSI_ILUTDecompose( sch_ptr ); return 0; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
/* Assume that we are given a fine and coarse topology and the coarse degrees of freedom (DOFs) have been chosen. Assume also, that the global interpolation matrix dof_DOF has a prescribed nonzero pattern. Then, the fine degrees of freedom can be split into 4 groups (here "i" stands for "interior"): NODEidof - dofs which are interpolated only from the DOF in one coarse vertex EDGEidof - dofs which are interpolated only from the DOFs in one coarse edge FACEidof - dofs which are interpolated only from the DOFs in one coarse face ELEMidof - dofs which are interpolated only from the DOFs in one coarse element The interpolation operator dof_DOF can be build in 4 steps, by consequently filling-in the rows corresponding to the above groups. The code below uses harmonic extension to extend the interpolation from one group to the next. */ HYPRE_Int hypre_ND1AMGeInterpolation (hypre_ParCSRMatrix * Aee, hypre_ParCSRMatrix * ELEM_idof, hypre_ParCSRMatrix * FACE_idof, hypre_ParCSRMatrix * EDGE_idof, hypre_ParCSRMatrix * ELEM_FACE, hypre_ParCSRMatrix * ELEM_EDGE, HYPRE_Int num_OffProcRows, hypre_MaxwellOffProcRow ** OffProcRows, hypre_IJMatrix * IJ_dof_DOF) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k; HYPRE_Int *offproc_rnums, *swap; hypre_ParCSRMatrix * dof_DOF = hypre_IJMatrixObject(IJ_dof_DOF); hypre_ParCSRMatrix * ELEM_DOF = ELEM_EDGE; hypre_ParCSRMatrix * ELEM_FACEidof; hypre_ParCSRMatrix * ELEM_EDGEidof; hypre_CSRMatrix *A, *P; HYPRE_Int numELEM = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(ELEM_EDGE)); HYPRE_Int getrow_ierr; HYPRE_Int three_dimensional_problem; MPI_Comm comm= hypre_ParCSRMatrixComm(Aee); HYPRE_Int myproc; hypre_MPI_Comm_rank(comm, &myproc); #if 0 hypre_IJMatrix * ij_dof_DOF = hypre_CTAlloc(hypre_IJMatrix, 1); /* Convert dof_DOF to IJ matrix, so we can use AddToValues */ hypre_IJMatrixComm(ij_dof_DOF) = hypre_ParCSRMatrixComm(dof_DOF); hypre_IJMatrixRowPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixRowStarts(dof_DOF); hypre_IJMatrixColPartitioning(ij_dof_DOF) = hypre_ParCSRMatrixColStarts(dof_DOF); hypre_IJMatrixObject(ij_dof_DOF) = dof_DOF; hypre_IJMatrixAssembleFlag(ij_dof_DOF) = 1; #endif /* sort the offproc rows to get quicker comparison for later */ if (num_OffProcRows) { offproc_rnums= hypre_TAlloc(HYPRE_Int, num_OffProcRows); swap = hypre_TAlloc(HYPRE_Int, num_OffProcRows); for (i= 0; i< num_OffProcRows; i++) { offproc_rnums[i]=(OffProcRows[i] -> row); swap[i] = i; } } if (num_OffProcRows > 1) { hypre_qsort2i(offproc_rnums, swap, 0, num_OffProcRows-1); } if (FACE_idof == EDGE_idof) three_dimensional_problem = 0; else three_dimensional_problem = 1; /* ELEM_FACEidof = ELEM_FACE x FACE_idof */ if (three_dimensional_problem) ELEM_FACEidof = hypre_ParMatmul(ELEM_FACE, FACE_idof); /* ELEM_EDGEidof = ELEM_EDGE x EDGE_idof */ ELEM_EDGEidof = hypre_ParMatmul(ELEM_EDGE, EDGE_idof); /* Loop over local coarse elements */ k = hypre_ParCSRMatrixFirstRowIndex(ELEM_EDGE); for (i = 0; i < numELEM; i++, k++) { HYPRE_Int size1, size2; HYPRE_Int *col_ind0, *col_ind1, *col_ind2; HYPRE_Int num_DOF, *DOF0, *DOF; HYPRE_Int num_idof, *idof0, *idof; HYPRE_Int num_bdof, *bdof; double *boolean_data; /* Determine the coarse DOFs */ hypre_ParCSRMatrixGetRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); DOF= hypre_TAlloc(HYPRE_Int, num_DOF); for (j= 0; j< num_DOF; j++) { DOF[j]= DOF0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_DOF, k, &num_DOF, &DOF0, &boolean_data); qsort0(DOF,0,num_DOF-1); /* Find the fine dofs interior for the current coarse element */ hypre_ParCSRMatrixGetRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); idof= hypre_TAlloc(HYPRE_Int, num_idof); for (j= 0; j< num_idof; j++) { idof[j]= idof0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_idof, k, &num_idof, &idof0, &boolean_data); /* Sort the interior dofs according to their global number */ qsort0(idof,0,num_idof-1); /* Find the fine dofs on the boundary of the current coarse element */ if (three_dimensional_problem) { hypre_ParCSRMatrixGetRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); col_ind1= hypre_TAlloc(HYPRE_Int, size1); for (j= 0; j< size1; j++) { col_ind1[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_FACEidof, k, &size1, &col_ind0, &boolean_data); } else size1 = 0; hypre_ParCSRMatrixGetRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); col_ind2= hypre_TAlloc(HYPRE_Int, size2); for (j= 0; j< size2; j++) { col_ind2[j]= col_ind0[j]; } hypre_ParCSRMatrixRestoreRow (ELEM_EDGEidof, k, &size2, &col_ind0, &boolean_data); /* Merge and sort the boundary dofs according to their global number */ num_bdof = size1 + size2; bdof = hypre_CTAlloc(HYPRE_Int, num_bdof); if (three_dimensional_problem) memcpy(bdof, col_ind1, size1*sizeof(HYPRE_Int)); memcpy(bdof+size1, col_ind2, size2*sizeof(HYPRE_Int)); qsort0(bdof,0,num_bdof-1); /* A = extract_rows(Aee, idof) */ A = hypre_CSRMatrixCreate (num_idof, num_idof + num_bdof, num_idof * (num_idof + num_bdof)); hypre_CSRMatrixInitialize(A); { HYPRE_Int *I = hypre_CSRMatrixI(A); HYPRE_Int *J = hypre_CSRMatrixJ(A); double *data = hypre_CSRMatrixData(A); HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr <0) hypre_printf("getrow Aee off proc[%d] = \n",myproc); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (Aee, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } } /* P = extract_rows(dof_DOF, idof+bdof) */ P = hypre_CSRMatrixCreate (num_idof + num_bdof, num_DOF, (num_idof + num_bdof) * num_DOF); hypre_CSRMatrixInitialize(P); { HYPRE_Int *I = hypre_CSRMatrixI(P); HYPRE_Int *J = hypre_CSRMatrixJ(P); double *data = hypre_CSRMatrixData(P); HYPRE_Int m; HYPRE_Int *tmp_J; double *tmp_data; I[0] = 0; for (j = 0; j < num_idof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, idof[j], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == idof[j]) { break; } else { m++; } } I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } for ( ; j < num_idof + num_bdof; j++) { getrow_ierr= hypre_ParCSRMatrixGetRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); if (getrow_ierr >= 0) { memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); I[j+1] += I[j]; } else /* row offproc */ { hypre_ParCSRMatrixRestoreRow (dof_DOF, bdof[j-num_idof], &I[j+1], &tmp_J, &tmp_data); /* search for OffProcRows */ m= 0; while (m < num_OffProcRows) { if (offproc_rnums[m] == bdof[j-num_idof]) { break; } else { m++; } } if (m>= num_OffProcRows)hypre_printf("here the mistake\n"); I[j+1]= (OffProcRows[swap[m]] -> ncols); tmp_J = (OffProcRows[swap[m]] -> cols); tmp_data= (OffProcRows[swap[m]] -> data); memcpy(J, tmp_J, I[j+1]*sizeof(HYPRE_Int)); memcpy(data, tmp_data, I[j+1]*sizeof(double)); J+= I[j+1]; data+= I[j+1]; I[j+1] += I[j]; } } } /* Pi = Aii^{-1} Aib Pb */ hypre_HarmonicExtension (A, P, num_DOF, DOF, num_idof, idof, num_bdof, bdof); /* Insert Pi in dof_DOF */ { HYPRE_Int * ncols = hypre_CTAlloc(HYPRE_Int, num_idof); for (j = 0; j < num_idof; j++) ncols[j] = num_DOF; hypre_IJMatrixAddToValuesParCSR (IJ_dof_DOF, num_idof, ncols, idof, hypre_CSRMatrixJ(P), hypre_CSRMatrixData(P)); hypre_TFree(ncols); } hypre_TFree(DOF); hypre_TFree(idof); if (three_dimensional_problem) { hypre_TFree(col_ind1); } hypre_TFree(col_ind2); hypre_TFree(bdof); hypre_CSRMatrixDestroy(A); hypre_CSRMatrixDestroy(P); } #if 0 hypre_TFree(ij_dof_DOF); #endif if (three_dimensional_problem) hypre_ParCSRMatrixDestroy(ELEM_FACEidof); hypre_ParCSRMatrixDestroy(ELEM_EDGEidof); if (num_OffProcRows) { hypre_TFree(offproc_rnums); hypre_TFree(swap); } return ierr; }
hypre_Vector * hypre_ParVectorToVectorAll (hypre_ParVector *par_v) { MPI_Comm comm = hypre_ParVectorComm(par_v); HYPRE_Int global_size = hypre_ParVectorGlobalSize(par_v); #ifndef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *vec_starts = hypre_ParVectorPartitioning(par_v); #endif hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_v); HYPRE_Int num_procs, my_id; HYPRE_Int num_vectors = hypre_ParVectorNumVectors(par_v); hypre_Vector *vector; double *vector_data; double *local_data; HYPRE_Int local_size; hypre_MPI_Request *requests; hypre_MPI_Status *status; HYPRE_Int i, j; HYPRE_Int *used_procs; HYPRE_Int num_types, num_requests; HYPRE_Int vec_len, proc_id; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *new_vec_starts; HYPRE_Int num_contacts; HYPRE_Int contact_proc_list[1]; HYPRE_Int contact_send_buf[1]; HYPRE_Int contact_send_buf_starts[2]; HYPRE_Int max_response_size; HYPRE_Int *response_recv_buf=NULL; HYPRE_Int *response_recv_buf_starts = NULL; hypre_DataExchangeResponse response_obj; hypre_ProcListElements send_proc_obj; HYPRE_Int *send_info = NULL; hypre_MPI_Status status1; HYPRE_Int count, tag1 = 112, tag2 = 223; HYPRE_Int start; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); #ifdef HYPRE_NO_GLOBAL_PARTITION local_size = hypre_ParVectorLastIndex(par_v) - hypre_ParVectorFirstIndex(par_v) + 1; /* determine procs which hold data of par_v and store ids in used_procs */ /* we need to do an exchange data for this. If I own row then I will contact processor 0 with the endpoint of my local range */ if (local_size > 0) { num_contacts = 1; contact_proc_list[0] = 0; contact_send_buf[0] = hypre_ParVectorLastIndex(par_v); contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 1; } else { num_contacts = 0; contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 0; } /*build the response object*/ /*send_proc_obj will be for saving info from contacts */ send_proc_obj.length = 0; send_proc_obj.storage_length = 10; send_proc_obj.id = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length); send_proc_obj.vec_starts = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1); send_proc_obj.vec_starts[0] = 0; send_proc_obj.element_storage_length = 10; send_proc_obj.elements = hypre_CTAlloc(HYPRE_Int, send_proc_obj.element_storage_length); max_response_size = 0; /* each response is null */ response_obj.fill_response = hypre_FillResponseParToVectorAll; response_obj.data1 = NULL; response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/ hypre_DataExchangeList(num_contacts, contact_proc_list, contact_send_buf, contact_send_buf_starts, sizeof(HYPRE_Int), sizeof(HYPRE_Int), &response_obj, max_response_size, 1, comm, (void**) &response_recv_buf, &response_recv_buf_starts); /* now processor 0 should have a list of ranges for processors that have rows - these are in send_proc_obj - it needs to create the new list of processors and also an array of vec starts - and send to those who own row*/ if (my_id) { if (local_size) { /* look for a message from processor 0 */ hypre_MPI_Probe(0, tag1, comm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count); hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); /* now unpack */ num_types = send_info[0]; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); for (i=1; i<= num_types; i++) { used_procs[i-1] = send_info[i]; } for (i=num_types+1; i< count; i++) { new_vec_starts[i-num_types-1] = send_info[i] ; } } else /* clean up and exit */ { hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); return NULL; } } else /* my_id ==0 */ { num_types = send_proc_obj.length; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); new_vec_starts[0] = 0; for (i=0; i< num_types; i++) { used_procs[i] = send_proc_obj.id[i]; new_vec_starts[i+1] = send_proc_obj.elements[i]+1; } qsort0(used_procs, 0, num_types-1); qsort0(new_vec_starts, 0, num_types); /*now we need to put into an array to send */ count = 2*num_types+2; send_info = hypre_CTAlloc(HYPRE_Int, count); send_info[0] = num_types; for (i=1; i<= num_types; i++) { send_info[i] = used_procs[i-1]; } for (i=num_types+1; i< count; i++) { send_info[i] = new_vec_starts[i-num_types-1]; } requests = hypre_CTAlloc(hypre_MPI_Request, num_types); status = hypre_CTAlloc(hypre_MPI_Status, num_types); /* don't send to myself - these are sorted so my id would be first*/ start = 0; if (used_procs[0] == 0) { start = 1; } for (i=start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, comm, &requests[i-start]); } hypre_MPI_Waitall(num_types-start, requests, status); hypre_TFree(status); hypre_TFree(requests); } /* clean up */ hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); hypre_TFree(send_info); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); /* now proc 0 can exit if it has no rows */ if (!local_size) { hypre_TFree(used_procs); hypre_TFree(new_vec_starts); return NULL; } /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */ /* this vector should be rather small */ local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); num_requests = 2*num_types; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector - here we send to ourself also*/ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = new_vec_starts[i+1] - new_vec_starts[i]; hypre_MPI_Irecv(&vector_data[new_vec_starts[i]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, tag2, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], tag2, comm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(requests); hypre_TFree(status); hypre_TFree(used_procs); } hypre_TFree(new_vec_starts); #else local_size = vec_starts[my_id+1] - vec_starts[my_id]; /* if my_id contains no data, return NULL */ if (!local_size) return NULL; local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); /* determine procs which hold data of par_v and store ids in used_procs */ num_types = -1; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i]) num_types++; num_requests = 2*num_types; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); j = 0; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i] && i-my_id) used_procs[j++] = i; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector */ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = vec_starts[proc_id+1] - vec_starts[proc_id]; hypre_MPI_Irecv(&vector_data[vec_starts[proc_id]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, 0, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], 0, comm, &requests[j++]); } for (i=0; i < num_vectors*local_size; i++) vector_data[vec_starts[my_id]+i] = local_data[i]; hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(used_procs); hypre_TFree(requests); hypre_TFree(status); } #endif return vector; }
HYPRE_Int HYPRE_ParCSRMLConstructMHMatrix(HYPRE_ParCSRMatrix A, MH_Matrix *mh_mat, MPI_Comm comm, HYPRE_Int *partition,MH_Context *obj) { HYPRE_Int i, j, index, my_id, nprocs, msgid, *tempCnt; HYPRE_Int sendProcCnt, *sendLeng, *sendProc, **sendList; HYPRE_Int recvProcCnt, *recvLeng, *recvProc; HYPRE_Int rowLeng, *colInd, startRow, endRow, localEqns; HYPRE_Int *diagSize, *offdiagSize, externLeng, *externList, ncnt, nnz; HYPRE_Int *rowptr, *columns, num_bdry; double *colVal, *values; hypre_MPI_Request *Request; hypre_MPI_Status status; /* -------------------------------------------------------- */ /* get machine information and local matrix information */ /* -------------------------------------------------------- */ hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &nprocs); startRow = partition[my_id]; endRow = partition[my_id+1] - 1; localEqns = endRow - startRow + 1; /* -------------------------------------------------------- */ /* probe A to find out about diagonal and off-diagonal */ /* block information */ /* -------------------------------------------------------- */ diagSize = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * localEqns ); offdiagSize = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * localEqns ); num_bdry = 0; for ( i = startRow; i <= endRow; i++ ) { diagSize[i-startRow] = offdiagSize[i-startRow] = 0; HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal); for (j = 0; j < rowLeng; j++) if ( colInd[j] < startRow || colInd[j] > endRow ) { //if ( colVal[j] != 0.0 ) offdiagSize[i-startRow]++; offdiagSize[i-startRow]++; } else { //if ( colVal[j] != 0.0 ) diagSize[i-startRow]++; diagSize[i-startRow]++; } HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal); if ( diagSize[i-startRow] + offdiagSize[i-startRow] == 1 ) num_bdry++; } /* -------------------------------------------------------- */ /* construct external node list in global eqn numbers */ /* -------------------------------------------------------- */ externLeng = 0; for ( i = 0; i < localEqns; i++ ) externLeng += offdiagSize[i]; if ( externLeng > 0 ) externList = (HYPRE_Int *) malloc( sizeof(HYPRE_Int) * externLeng); else externList = NULL; externLeng = 0; for ( i = startRow; i <= endRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal); for (j = 0; j < rowLeng; j++) { if ( colInd[j] < startRow || colInd[j] > endRow ) //if ( colVal[j] != 0.0 ) externList[externLeng++] = colInd[j]; externList[externLeng++] = colInd[j]; } HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal); } qsort0( externList, 0, externLeng-1 ); ncnt = 0; for ( i = 1; i < externLeng; i++ ) { if ( externList[i] != externList[ncnt] ) externList[++ncnt] = externList[i]; } externLeng = ncnt + 1; /* -------------------------------------------------------- */ /* allocate the CSR matrix */ /* -------------------------------------------------------- */ nnz = 0; for ( i = 0; i < localEqns; i++ ) nnz += diagSize[i] + offdiagSize[i]; rowptr = (HYPRE_Int *) malloc( (localEqns + 1) * sizeof(HYPRE_Int) ); columns = (HYPRE_Int *) malloc( nnz * sizeof(HYPRE_Int) ); values = (double *) malloc( nnz * sizeof(double) ); rowptr[0] = 0; for ( i = 1; i <= localEqns; i++ ) rowptr[i] = rowptr[i-1] + diagSize[i-1] + offdiagSize[i-1]; free( diagSize ); free( offdiagSize ); /* -------------------------------------------------------- */ /* put the matrix data in the CSR matrix */ /* -------------------------------------------------------- */ rowptr[0] = 0; ncnt = 0; for ( i = startRow; i <= endRow; i++ ) { HYPRE_ParCSRMatrixGetRow(A, i, &rowLeng, &colInd, &colVal); for (j = 0; j < rowLeng; j++) { index = colInd[j]; //if ( colVal[j] != 0.0 ) { if ( index < startRow || index > endRow ) { columns[ncnt] = hypre_BinarySearch(externList,index, externLeng ); columns[ncnt] += localEqns; values [ncnt++] = colVal[j]; } else { columns[ncnt] = index - startRow; values[ncnt++] = colVal[j]; } } } rowptr[i-startRow+1] = ncnt; HYPRE_ParCSRMatrixRestoreRow(A, i, &rowLeng, &colInd, &colVal); } assert( ncnt == nnz ); /* -------------------------------------------------------- */ /* initialize the MH_Matrix data structure */ /* -------------------------------------------------------- */ mh_mat->Nrows = localEqns; mh_mat->rowptr = rowptr; mh_mat->colnum = columns; mh_mat->values = values; mh_mat->sendProcCnt = 0; mh_mat->recvProcCnt = 0; mh_mat->sendLeng = NULL; mh_mat->recvLeng = NULL; mh_mat->sendProc = NULL; mh_mat->recvProc = NULL; mh_mat->sendList = NULL; mh_mat->map = externList; /* -------------------------------------------------------- */ /* form the remote portion of the matrix */ /* -------------------------------------------------------- */ if ( nprocs > 1 ) { /* ----------------------------------------------------- */ /* count number of elements to be received from each */ /* remote processor (assume sequential mapping) */ /* ----------------------------------------------------- */ tempCnt = (HYPRE_Int *) malloc( sizeof(HYPRE_Int) * nprocs ); for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; for ( i = 0; i < externLeng; i++ ) { for ( j = 0; j < nprocs; j++ ) { if ( externList[i] >= partition[j] && externList[i] < partition[j+1] ) { tempCnt[j]++; break; } } } /* ----------------------------------------------------- */ /* compile a list processors data is to be received from */ /* ----------------------------------------------------- */ recvProcCnt = 0; for ( i = 0; i < nprocs; i++ ) if ( tempCnt[i] > 0 ) recvProcCnt++; recvLeng = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * recvProcCnt ); recvProc = (HYPRE_Int*) malloc( sizeof(HYPRE_Int) * recvProcCnt ); recvProcCnt = 0; for ( i = 0; i < nprocs; i++ ) { if ( tempCnt[i] > 0 ) { recvProc[recvProcCnt] = i; recvLeng[recvProcCnt++] = tempCnt[i]; } } /* ----------------------------------------------------- */ /* each processor has to find out how many processors it */ /* has to send data to */ /* ----------------------------------------------------- */ sendLeng = (HYPRE_Int *) malloc( nprocs * sizeof(HYPRE_Int) ); for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; for ( i = 0; i < recvProcCnt; i++ ) tempCnt[recvProc[i]] = 1; hypre_MPI_Allreduce(tempCnt, sendLeng, nprocs, HYPRE_MPI_INT, hypre_MPI_SUM, comm ); sendProcCnt = sendLeng[my_id]; free( sendLeng ); if ( sendProcCnt > 0 ) { sendLeng = (HYPRE_Int *) malloc( sendProcCnt * sizeof(HYPRE_Int) ); sendProc = (HYPRE_Int *) malloc( sendProcCnt * sizeof(HYPRE_Int) ); sendList = (HYPRE_Int **) malloc( sendProcCnt * sizeof(HYPRE_Int*) ); } else { sendLeng = sendProc = NULL; sendList = NULL; } /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the lengths of data expected */ /* ----------------------------------------------------- */ msgid = 539; for ( i = 0; i < recvProcCnt; i++ ) { hypre_MPI_Send((void*) &recvLeng[i],1,HYPRE_MPI_INT,recvProc[i],msgid,comm); } for ( i = 0; i < sendProcCnt; i++ ) { hypre_MPI_Recv((void*) &sendLeng[i],1,HYPRE_MPI_INT,hypre_MPI_ANY_SOURCE,msgid, comm,&status); sendProc[i] = status.hypre_MPI_SOURCE; sendList[i] = (HYPRE_Int *) malloc( sendLeng[i] * sizeof(HYPRE_Int) ); if ( sendList[i] == NULL ) hypre_printf("allocate problem %d \n", sendLeng[i]); } /* ----------------------------------------------------- */ /* each processor sends to all processors it expects to */ /* receive data about the equation numbers */ /* ----------------------------------------------------- */ for ( i = 0; i < nprocs; i++ ) tempCnt[i] = 0; ncnt = 1; for ( i = 0; i < externLeng; i++ ) { if ( externList[i] >= partition[ncnt] ) { tempCnt[ncnt-1] = i; i--; ncnt++; } } for ( i = ncnt-1; i < nprocs; i++ ) tempCnt[i] = externLeng; /* ----------------------------------------------------- */ /* send the global equation numbers */ /* ----------------------------------------------------- */ msgid = 540; for ( i = 0; i < recvProcCnt; i++ ) { if ( recvProc[i] == 0 ) j = 0; else j = tempCnt[recvProc[i]-1]; rowLeng = recvLeng[i]; hypre_MPI_Send((void*) &externList[j],rowLeng,HYPRE_MPI_INT,recvProc[i], msgid,comm); } for ( i = 0; i < sendProcCnt; i++ ) { rowLeng = sendLeng[i]; hypre_MPI_Recv((void*)sendList[i],rowLeng,HYPRE_MPI_INT,sendProc[i], msgid,comm,&status); } /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ /* ----------------------------------------------------- */ for ( i = 0; i < sendProcCnt; i++ ) { for ( j = 0; j < sendLeng[i]; j++ ) { index = sendList[i][j] - startRow; if ( index < 0 || index >= localEqns ) { hypre_printf("%d : Construct MH matrix Error - index out "); hypre_printf("of range%d\n", my_id, index); } sendList[i][j] = index; } } /* ----------------------------------------------------- */ /* convert the send list from global to local numbers */ /* ----------------------------------------------------- */ mh_mat->sendProcCnt = sendProcCnt; mh_mat->recvProcCnt = recvProcCnt; mh_mat->sendLeng = sendLeng; mh_mat->recvLeng = recvLeng; mh_mat->sendProc = sendProc; mh_mat->recvProc = recvProc; mh_mat->sendList = sendList; /* ----------------------------------------------------- */ /* clean up */ /* ----------------------------------------------------- */ free( tempCnt ); } return 0; }
HYPRE_Int hypre_CreateLambda(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ MPI_Comm comm; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_ParCSRMatrix *Lambda; hypre_CSRMatrix *L_diag; hypre_CSRMatrix *L_offd; hypre_CSRMatrix *A_tmp_diag; hypre_CSRMatrix *A_tmp_offd; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; hypre_ParCSRCommPkg *comm_pkg; hypre_ParCSRCommPkg *L_comm_pkg = NULL; hypre_ParCSRCommHandle *comm_handle; HYPRE_Real *L_diag_data; HYPRE_Real *L_offd_data; HYPRE_Real *buf_data = NULL; HYPRE_Real *tmp_data; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *l1_norms; HYPRE_Real *A_tmp_diag_data; HYPRE_Real *A_tmp_offd_data; HYPRE_Real *D_data = NULL; HYPRE_Real *D_data_offd = NULL; HYPRE_Int *L_diag_i; HYPRE_Int *L_diag_j; HYPRE_Int *L_offd_i; HYPRE_Int *L_offd_j; HYPRE_Int *A_tmp_diag_i; HYPRE_Int *A_tmp_offd_i; HYPRE_Int *A_tmp_diag_j; HYPRE_Int *A_tmp_offd_j; HYPRE_Int *L_recv_ptr = NULL; HYPRE_Int *L_send_ptr = NULL; HYPRE_Int *L_recv_procs = NULL; HYPRE_Int *L_send_procs = NULL; HYPRE_Int *L_send_map_elmts = NULL; HYPRE_Int *recv_procs; HYPRE_Int *send_procs; HYPRE_Int *send_map_elmts; HYPRE_Int *send_map_starts; HYPRE_Int *recv_vec_starts; HYPRE_Int *all_send_procs = NULL; HYPRE_Int *all_recv_procs = NULL; HYPRE_Int *remap = NULL; HYPRE_Int *level_start; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_procs; HYPRE_Int num_sends, num_recvs; HYPRE_Int num_sends_L = 0; HYPRE_Int num_recvs_L = 0; HYPRE_Int send_data_L = 0; HYPRE_Int num_rows_L = 0; HYPRE_Int num_rows_tmp = 0; HYPRE_Int num_cols_offd_L = 0; HYPRE_Int num_cols_offd = 0; HYPRE_Int level, i, j, k; HYPRE_Int this_proc, cnt, cnt_diag, cnt_offd; HYPRE_Int cnt_recv, cnt_send, cnt_row, row_start; HYPRE_Int start_diag, start_offd, indx, cnt_map; HYPRE_Int start, j_indx, index, cnt_level; HYPRE_Int max_sends, max_recvs; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_size(comm,&num_procs); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); addlvl = hypre_max(additive, mult_additive); num_add_lvls = num_levels+1-addlvl; level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1); send_data_L = 0; num_rows_L = 0; num_cols_offd_L = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; level_start[0] = 0; cnt = 1; max_sends = 0; max_recvs = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd); num_rows_L += num_rows_tmp; level_start[cnt] = level_start[cnt-1] + num_rows_tmp; cnt++; num_cols_offd_L += num_cols_offd; num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp]; num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); max_sends += num_sends; if (num_sends) send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends); max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg); } } if (max_sends >= num_procs ||max_recvs >= num_procs) { max_sends = num_procs; max_recvs = num_procs; } if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends); if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs); cnt_send = 0; cnt_recv = 0; if (max_sends || max_recvs) { if (max_sends < num_procs && max_recvs < num_procs) { for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); for (j = 0; j < num_sends; j++) all_send_procs[cnt_send++] = send_procs[j]; for (j = 0; j < num_recvs; j++) all_recv_procs[cnt_recv++] = recv_procs[j]; } } if (max_sends) { qsort0(all_send_procs, 0, max_sends-1); num_sends_L = 1; this_proc = all_send_procs[0]; for (i=1; i < max_sends; i++) { if (all_send_procs[i] > this_proc) { this_proc = all_send_procs[i]; all_send_procs[num_sends_L++] = this_proc; } } L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); for (j=0; j < num_sends_L; j++) L_send_procs[j] = all_send_procs[j]; hypre_TFree(all_send_procs); } if (max_recvs) { qsort0(all_recv_procs, 0, max_recvs-1); num_recvs_L = 1; this_proc = all_recv_procs[0]; for (i=1; i < max_recvs; i++) { if (all_recv_procs[i] > this_proc) { this_proc = all_recv_procs[i]; all_recv_procs[num_recvs_L++] = this_proc; } } L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); for (j=0; j < num_recvs_L; j++) L_recv_procs[j] = all_recv_procs[j]; hypre_TFree(all_recv_procs); } L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } for (k = 0; k < num_sends; k++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L); L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k]; } for (k = 0; k < num_recvs; k++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L); L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k]; } } L_recv_ptr[0] = 0; for (i=1; i < num_recvs_L; i++) L_recv_ptr[i+1] += L_recv_ptr[i]; L_send_ptr[0] = 0; for (i=1; i < num_sends_L; i++) L_send_ptr[i+1] += L_send_ptr[i]; } else { num_recvs_L = 0; num_sends_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); for (j = 0; j < num_sends; j++) { this_proc = send_procs[j]; if (all_send_procs[this_proc] == 0) num_sends_L++; all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j]; } for (j = 0; j < num_recvs; j++) { this_proc = recv_procs[j]; if (all_recv_procs[this_proc] == 0) num_recvs_L++; all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j]; } } } if (max_sends) { L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); num_sends_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_send_procs[j]; if (this_proc) { L_send_procs[num_sends_L++] = j; L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1]; } } } if (max_recvs) { L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); num_recvs_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_recv_procs[j]; if (this_proc) { L_recv_procs[num_recvs_L++] = j; L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1]; } } } } } if (max_sends) hypre_TFree(all_send_procs); if (max_recvs) hypre_TFree(all_recv_procs); L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag); L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd); hypre_CSRMatrixInitialize(L_diag); hypre_CSRMatrixInitialize(L_offd); if (num_nonzeros_diag) { L_diag_data = hypre_CSRMatrixData(L_diag); L_diag_j = hypre_CSRMatrixJ(L_diag); } L_diag_i = hypre_CSRMatrixI(L_diag); if (num_nonzeros_offd) { L_offd_data = hypre_CSRMatrixData(L_offd); L_offd_j = hypre_CSRMatrixJ(L_offd); } L_offd_i = hypre_CSRMatrixI(L_offd); if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L); if (send_data_L) { L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L); buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L); } if (num_cols_offd_L) { D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L); /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/ remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L); } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); cnt = 0; cnt_level = 0; cnt_diag = 0; cnt_offd = 0; cnt_row = 1; L_diag_i[0] = 0; L_offd_i[0] = 0; for (level=addlvl; level < num_levels; level++) { row_start = level_start[cnt_level]; if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } cnt_level++; start_diag = L_diag_i[cnt_row-1]; start_offd = L_offd_i[cnt_row-1]; A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag); A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd); A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } /* Compute new combined communication package */ for (i=0; i < num_sends; i++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L); indx = L_send_ptr[this_proc]; for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++) { L_send_map_elmts[indx++] = row_start + send_map_elmts[j]; } L_send_ptr[this_proc] = indx; } cnt_map = 0; for (i = 0; i < num_recvs; i++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L); indx = L_recv_ptr[this_proc]; for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { remap[cnt_map++] = indx++; } L_recv_ptr[this_proc] = indx; } /* Compute Lambda */ if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = 1.0/l1_norms[i]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } if (num_procs > 1) { index = 0; for (i=0; i < num_sends; i++) { start = send_map_starts[i]; for (j=start; j < send_map_starts[i+1]; j++) buf_data[index++] = D_data[send_map_elmts[j]]; } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, D_data_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } for (i = 0; i < num_rows_tmp; i++) { j_indx = A_tmp_diag_i[i]; L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i]; L_diag_j[cnt_diag++] = i+row_start; for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++) { j_indx = A_tmp_diag_j[j]; L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i]; L_diag_j[cnt_diag++] = j_indx+row_start; } for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++) { j_indx = A_tmp_offd_j[j]; L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i]; L_offd_j[cnt_offd++] = remap[j_indx]; } } cnt_row += num_rows_tmp; } if (L_send_ptr) { for (i=num_sends_L-1; i > 0; i--) L_send_ptr[i] = L_send_ptr[i-1]; L_send_ptr[0] = 0; } else L_send_ptr = hypre_CTAlloc(HYPRE_Int,1); if (L_recv_ptr) { for (i=num_recvs_L-1; i > 0; i--) L_recv_ptr[i] = L_recv_ptr[i-1]; L_recv_ptr[0] = 0; } else L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1); L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L; hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L; hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs; hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs; hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr; hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr; hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts; hypre_ParCSRCommPkgComm(L_comm_pkg) = comm; Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1); hypre_ParCSRMatrixDiag(Lambda) = L_diag; hypre_ParCSRMatrixOffd(Lambda) = L_offd; hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg; hypre_ParCSRMatrixComm(Lambda) = comm; hypre_ParCSRMatrixOwnsData(Lambda) = 1; hypre_ParAMGDataLambda(amg_data) = Lambda; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; hypre_TFree(D_data_offd); hypre_TFree(D_data); if (num_procs > 1) hypre_TFree(buf_data); hypre_TFree(remap); hypre_TFree(buf_data); hypre_TFree(level_start); return Solve_err_flag; }
HYPRE_ParCSRMatrix GenerateDifConv( MPI_Comm comm, HYPRE_Int nx, HYPRE_Int ny, HYPRE_Int nz, HYPRE_Int P, HYPRE_Int Q, HYPRE_Int R, HYPRE_Int p, HYPRE_Int q, HYPRE_Int r, HYPRE_Real *value ) { hypre_ParCSRMatrix *A; hypre_CSRMatrix *diag; hypre_CSRMatrix *offd; HYPRE_Int *diag_i; HYPRE_Int *diag_j; HYPRE_Real *diag_data; HYPRE_Int *offd_i; HYPRE_Int *offd_j; HYPRE_Real *offd_data; HYPRE_Int *global_part; HYPRE_Int ix, iy, iz; HYPRE_Int cnt, o_cnt; HYPRE_Int local_num_rows; HYPRE_Int *col_map_offd; HYPRE_Int row_index; HYPRE_Int i,j; HYPRE_Int nx_local, ny_local, nz_local; HYPRE_Int nx_size, ny_size, nz_size; HYPRE_Int num_cols_offd; HYPRE_Int grid_size; HYPRE_Int *nx_part; HYPRE_Int *ny_part; HYPRE_Int *nz_part; HYPRE_Int num_procs, my_id; HYPRE_Int P_busy, Q_busy, R_busy; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); grid_size = nx*ny*nz; hypre_GeneratePartitioning(nx,P,&nx_part); hypre_GeneratePartitioning(ny,Q,&ny_part); hypre_GeneratePartitioning(nz,R,&nz_part); global_part = hypre_CTAlloc(HYPRE_Int,P*Q*R+1); global_part[0] = 0; cnt = 1; for (iz = 0; iz < R; iz++) { nz_size = nz_part[iz+1]-nz_part[iz]; for (iy = 0; iy < Q; iy++) { ny_size = ny_part[iy+1]-ny_part[iy]; for (ix = 0; ix < P; ix++) { nx_size = nx_part[ix+1] - nx_part[ix]; global_part[cnt] = global_part[cnt-1]; global_part[cnt++] += nx_size*ny_size*nz_size; } } } nx_local = nx_part[p+1] - nx_part[p]; ny_local = ny_part[q+1] - ny_part[q]; nz_local = nz_part[r+1] - nz_part[r]; my_id = r*(P*Q) + q*P + p; num_procs = P*Q*R; local_num_rows = nx_local*ny_local*nz_local; diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); P_busy = hypre_min(nx,P); Q_busy = hypre_min(ny,Q); R_busy = hypre_min(nz,R); num_cols_offd = 0; if (p) num_cols_offd += ny_local*nz_local; if (p < P_busy-1) num_cols_offd += ny_local*nz_local; if (q) num_cols_offd += nx_local*nz_local; if (q < Q_busy-1) num_cols_offd += nx_local*nz_local; if (r) num_cols_offd += nx_local*ny_local; if (r < R_busy-1) num_cols_offd += nx_local*ny_local; if (!local_num_rows) num_cols_offd = 0; col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); cnt = 1; o_cnt = 1; diag_i[0] = 0; offd_i[0] = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_i[cnt] = diag_i[cnt-1]; offd_i[o_cnt] = offd_i[o_cnt-1]; diag_i[cnt]++; if (iz > nz_part[r]) diag_i[cnt]++; else { if (iz) { offd_i[o_cnt]++; } } if (iy > ny_part[q]) diag_i[cnt]++; else { if (iy) { offd_i[o_cnt]++; } } if (ix > nx_part[p]) diag_i[cnt]++; else { if (ix) { offd_i[o_cnt]++; } } if (ix+1 < nx_part[p+1]) diag_i[cnt]++; else { if (ix+1 < nx) { offd_i[o_cnt]++; } } if (iy+1 < ny_part[q+1]) diag_i[cnt]++; else { if (iy+1 < ny) { offd_i[o_cnt]++; } } if (iz+1 < nz_part[r+1]) diag_i[cnt]++; else { if (iz+1 < nz) { offd_i[o_cnt]++; } } cnt++; o_cnt++; } } } diag_j = hypre_CTAlloc(HYPRE_Int, diag_i[local_num_rows]); diag_data = hypre_CTAlloc(HYPRE_Real, diag_i[local_num_rows]); if (num_procs > 1) { offd_j = hypre_CTAlloc(HYPRE_Int, offd_i[local_num_rows]); offd_data = hypre_CTAlloc(HYPRE_Real, offd_i[local_num_rows]); } row_index = 0; cnt = 0; o_cnt = 0; for (iz = nz_part[r]; iz < nz_part[r+1]; iz++) { for (iy = ny_part[q]; iy < ny_part[q+1]; iy++) { for (ix = nx_part[p]; ix < nx_part[p+1]; ix++) { diag_j[cnt] = row_index; diag_data[cnt++] = value[0]; if (iz > nz_part[r]) { diag_j[cnt] = row_index-nx_local*ny_local; diag_data[cnt++] = value[3]; } else { if (iz) { offd_j[o_cnt] = hypre_map(ix,iy,iz-1,p,q,r-1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[3]; } } if (iy > ny_part[q]) { diag_j[cnt] = row_index-nx_local; diag_data[cnt++] = value[2]; } else { if (iy) { offd_j[o_cnt] = hypre_map(ix,iy-1,iz,p,q-1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[2]; } } if (ix > nx_part[p]) { diag_j[cnt] = row_index-1; diag_data[cnt++] = value[1]; } else { if (ix) { offd_j[o_cnt] = hypre_map(ix-1,iy,iz,p-1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[1]; } } if (ix+1 < nx_part[p+1]) { diag_j[cnt] = row_index+1; diag_data[cnt++] = value[4]; } else { if (ix+1 < nx) { offd_j[o_cnt] = hypre_map(ix+1,iy,iz,p+1,q,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[4]; } } if (iy+1 < ny_part[q+1]) { diag_j[cnt] = row_index+nx_local; diag_data[cnt++] = value[5]; } else { if (iy+1 < ny) { offd_j[o_cnt] = hypre_map(ix,iy+1,iz,p,q+1,r,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[5]; } } if (iz+1 < nz_part[r+1]) { diag_j[cnt] = row_index+nx_local*ny_local; diag_data[cnt++] = value[6]; } else { if (iz+1 < nz) { offd_j[o_cnt] = hypre_map(ix,iy,iz+1,p,q,r+1,P,Q,R, nx_part,ny_part,nz_part,global_part); offd_data[o_cnt++] = value[6]; } } row_index++; } } } if (num_procs > 1) { for (i=0; i < num_cols_offd; i++) col_map_offd[i] = offd_j[i]; qsort0(col_map_offd, 0, num_cols_offd-1); for (i=0; i < num_cols_offd; i++) for (j=0; j < num_cols_offd; j++) if (offd_j[i] == col_map_offd[j]) { offd_j[i] = j; break; } } A = hypre_ParCSRMatrixCreate(comm, grid_size, grid_size, global_part, global_part, num_cols_offd, diag_i[local_num_rows], offd_i[local_num_rows]); hypre_ParCSRMatrixColMapOffd(A) = col_map_offd; diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixJ(diag) = diag_j; hypre_CSRMatrixData(diag) = diag_data; offd = hypre_ParCSRMatrixOffd(A); hypre_CSRMatrixI(offd) = offd_i; if (num_cols_offd) { hypre_CSRMatrixJ(offd) = offd_j; hypre_CSRMatrixData(offd) = offd_data; } hypre_TFree(nx_part); hypre_TFree(ny_part); hypre_TFree(nz_part); return (HYPRE_ParCSRMatrix) A; }