HYPRE_Int hypre_ParCSRMatrix_dof_func_offd( hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int **dof_func_offd ) { hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_offd = 0; HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_sends; HYPRE_Int *int_buf_data; HYPRE_Int index, start, i, j; num_cols_offd = hypre_CSRMatrixNumCols(A_offd); *dof_func_offd = NULL; if (num_cols_offd) { if (num_functions > 1) *dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); } /*------------------------------------------------------------------- * Get the dof_func data for the off-processor columns *-------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_functions > 1) { int_buf_data = hypre_CTAlloc(HYPRE_Int,hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, *dof_func_offd); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } return(Solve_err_flag); }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * nlocal : the number of locally created coarse grids * CF_marker, CF_marker_offd : the coare/fine markers * coarsen_type : the coarsening type * vrange : the ranges of the vertices representing coarse grids * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int mpisize,mpirank; HYPRE_Int num_sends; HYPRE_Int *vertexrange=NULL; HYPRE_Int vstart,vend; HYPRE_Int *int_buf_data; HYPRE_Int start; HYPRE_Int i,ii,j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_ParCSRCommHandle *comm_handle; hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); if (!comm_pkg) { hypre_MatvecCommPkgCreate (S); comm_pkg = hypre_ParCSRMatrixCommPkg (S); } num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */ #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int,2); hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ vertexrange[1] = scan_recv; vstart = vertexrange[0]; vend = vertexrange[1]; } #else vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm); vertexrange[0]=0; for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1]; vstart = vertexrange[mpirank]; vend = vertexrange[mpirank+1]; #endif /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */ if (coarsen_type % 2 == 1) { /* see above */ for (i=0;i<num_variables;i++) if (CF_marker[i]>0) CF_marker[i]+=vstart; } else { /* hypre_printf ("processor %d: empty grid allowed\n",mpirank); */ for (i=0;i<num_variables;i++) { if (CF_marker[i]>0) CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid. Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in global indices, ... */ } } /* exchange data */ *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd); int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends)); for (i=0,ii=0;i<num_sends;i++) { start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i); for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++) int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (mpisize>1) { comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd); hypre_ParCSRCommHandleDestroy (comm_handle); } hypre_TFree (int_buf_data); *vrange=vertexrange; return (ierr); }
/************************************************************** * * CGC Coarsening routine * **************************************************************/ HYPRE_Int hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int measure_type, HYPRE_Int coarsen_type, HYPRE_Int cgc_its, HYPRE_Int debug_flag, HYPRE_Int **CF_marker_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows(S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(S_offd); hypre_CSRMatrix *S_ext; HYPRE_Int *S_ext_i; HYPRE_Int *S_ext_j; hypre_CSRMatrix *ST; HYPRE_Int *ST_i; HYPRE_Int *ST_j; HYPRE_Int *CF_marker; HYPRE_Int *CF_marker_offd=NULL; HYPRE_Int ci_tilde = -1; HYPRE_Int ci_tilde_mark = -1; HYPRE_Int *measure_array; HYPRE_Int *measure_array_master; HYPRE_Int *graph_array; HYPRE_Int *int_buf_data=NULL; /*HYPRE_Int *ci_array=NULL;*/ HYPRE_Int i, j, k, l, jS; HYPRE_Int ji, jj, index; HYPRE_Int set_empty = 1; HYPRE_Int C_i_nonempty = 0; HYPRE_Int num_nonzeros; HYPRE_Int num_procs, my_id; HYPRE_Int num_sends = 0; HYPRE_Int first_col, start; HYPRE_Int col_0, col_n; hypre_LinkList LoL_head; hypre_LinkList LoL_tail; HYPRE_Int *lists, *where; HYPRE_Int measure, new_meas; HYPRE_Int num_left; HYPRE_Int nabor, nabor_two; HYPRE_Int ierr = 0; HYPRE_Int use_commpkg_A = 0; HYPRE_Real wall_time; HYPRE_Int measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */ if (coarsen_type < 0) coarsen_type = -coarsen_type; /*------------------------------------------------------- * Initialize the C/F marker, LoL_head, LoL_tail arrays *-------------------------------------------------------*/ LoL_head = NULL; LoL_tail = NULL; lists = hypre_CTAlloc(HYPRE_Int, num_variables); where = hypre_CTAlloc(HYPRE_Int, num_variables); #if 0 /* debugging */ char filename[256]; FILE *fp; HYPRE_Int iter = 0; #endif /*-------------------------------------------------------------- * Compute a CSR strength matrix, S. * * For now, the "strength" of dependence/influence is defined in * the following way: i depends on j if * aij > hypre_max (k != i) aik, aii < 0 * or * aij < hypre_min (k != i) aik, aii >= 0 * Then S_ij = 1, else S_ij = 0. * * NOTE: the entries are negative initially, corresponding * to "unaccounted-for" dependence. *----------------------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (!comm_pkg) { use_commpkg_A = 1; comm_pkg = hypre_ParCSRMatrixCommPkg(A); } if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd); jS = S_i[num_variables]; ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS); ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1); ST_j = hypre_CTAlloc(HYPRE_Int,jS); hypre_CSRMatrixI(ST) = ST_i; hypre_CSRMatrixJ(ST) = ST_j; /*---------------------------------------------------------- * generate transpose of S, ST *----------------------------------------------------------*/ for (i=0; i <= num_variables; i++) ST_i[i] = 0; for (i=0; i < jS; i++) { ST_i[S_j[i]+1]++; } for (i=0; i < num_variables; i++) { ST_i[i+1] += ST_i[i]; } for (i=0; i < num_variables; i++) { for (j=S_i[i]; j < S_i[i+1]; j++) { index = S_j[j]; ST_j[ST_i[index]] = i; ST_i[index]++; } } for (i = num_variables; i > 0; i--) { ST_i[i] = ST_i[i-1]; } ST_i[0] = 0; /*---------------------------------------------------------- * Compute the measures * * The measures are given by the row sums of ST. * Hence, measure_array[i] is the number of influences * of variable i. * correct actual measures through adding influences from * neighbor processors *----------------------------------------------------------*/ measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables); measure_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { measure_array_master[i] = ST_i[i+1]-ST_i[i]; } if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) { if (use_commpkg_A) S_ext = hypre_ParCSRMatrixExtractBExt(S,A,0); else S_ext = hypre_ParCSRMatrixExtractBExt(S,S,0); S_ext_i = hypre_CSRMatrixI(S_ext); S_ext_j = hypre_CSRMatrixJ(S_ext); num_nonzeros = S_ext_i[num_cols_offd]; first_col = hypre_ParCSRMatrixFirstColDiag(S); col_0 = first_col-1; col_n = col_0+num_variables; if (measure_type) { for (i=0; i < num_nonzeros; i++) { index = S_ext_j[i] - first_col; if (index > -1 && index < num_variables) measure_array_master[index]++; } } } /*--------------------------------------------------- * Loop until all points are either fine or coarse. *---------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); /* first coarsening phase */ /************************************************************* * * Initialize the lists * *************************************************************/ CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); num_left = 0; for (j = 0; j < num_variables; j++) { if ((S_i[j+1]-S_i[j])== 0 && (S_offd_i[j+1]-S_offd_i[j]) == 0) { CF_marker[j] = SF_PT; measure_array_master[j] = 0; } else { CF_marker[j] = UNDECIDED; /* num_left++; */ /* BM May 19, 2006: see below*/ } } if (coarsen_type==22) { /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: (a) the point has no strong connections at all, OR (b) the point has a strong connection across a boundary */ for (j=0;j<num_variables;j++) if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;} } for (l = 1; l <= cgc_its; l++) { LoL_head = NULL; LoL_tail = NULL; num_left = 0; /* compute num_left before each RS coarsening loop */ memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int)); memset (lists,0,sizeof(HYPRE_Int)*num_variables); memset (where,0,sizeof(HYPRE_Int)*num_variables); for (j = 0; j < num_variables; j++) { measure = measure_array[j]; if (CF_marker[j] != SF_PT) { if (measure > 0) { enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where); num_left++; /* compute num_left before each RS coarsening loop */ } else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only if j is not conained in a previously constructed coarse grid. Reason: these neighbors should start with the same initial weight in each CGC iteration. BM Aug 30, 2006 */ { if (measure < 0) hypre_printf("negative measure!\n"); /* CF_marker[j] = f_pnt; */ for (k = S_i[j]; k < S_i[j+1]; k++) { nabor = S_j[k]; /* if (CF_marker[nabor] != SF_PT) */ if (CF_marker[nabor] == 0) /* BM Aug 30, 2006: don't alter weights of points contained in other candidate coarse grids */ { if (nabor < j) { new_meas = measure_array[nabor]; if (new_meas > 0) remove_point(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); else num_left++; /* BM Aug 29, 2006 */ new_meas = ++(measure_array[nabor]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); } else { new_meas = ++(measure_array[nabor]); } } } /* --num_left; */ /* BM May 19, 2006 */ } } } /* BM Aug 30, 2006: first iteration: determine maximal weight */ if (num_left && l==1) measure_max = measure_array[LoL_head->head]; /* BM Aug 30, 2006: break CGC iteration if no suitable starting point is available any more */ if (!num_left || measure_array[LoL_head->head]<measure_max) { while (LoL_head) { hypre_LinkList list_ptr = LoL_head; LoL_head = LoL_head->next_elt; dispose_elt (list_ptr); } break; } /**************************************************************** * * Main loop of Ruge-Stueben first coloring pass. * * WHILE there are still points to classify DO: * 1) find first point, i, on list with max_measure * make i a C-point, remove it from the lists * 2) For each point, j, in S_i^T, * a) Set j to be an F-point * b) For each point, k, in S_j * move k to the list in LoL with measure one * greater than it occupies (creating new LoL * entry if necessary) * 3) For each point, j, in S_i, * move j to the list in LoL with measure one * smaller than it occupies (creating new LoL * entry if necessary) * ****************************************************************/ while (num_left > 0) { index = LoL_head -> head; /* index = LoL_head -> tail; */ /* CF_marker[index] = C_PT; */ CF_marker[index] = l; /* BM Aug 18, 2006 */ measure = measure_array[index]; measure_array[index] = 0; measure_array_master[index] = 0; /* BM May 19: for CGC */ --num_left; remove_point(&LoL_head, &LoL_tail, measure, index, lists, where); for (j = ST_i[index]; j < ST_i[index+1]; j++) { nabor = ST_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ measure = measure_array[nabor]; measure_array[nabor]=0; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) /* undecided point */ { measure = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, measure, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } for (j = S_i[index]; j < S_i[index+1]; j++) { nabor = S_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { measure = measure_array[nabor]; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); measure_array[nabor] = --measure; if (measure > 0) enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, lists, where); else { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) { new_meas = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } } } if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head); } l--; /* BM Aug 15, 2006 */ hypre_TFree(measure_array); hypre_TFree(measure_array_master); hypre_CSRMatrixDestroy(ST); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 1st pass = %f\n", my_id, wall_time); } hypre_TFree(lists); hypre_TFree(where); if (num_procs>1) { if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen CGC = %f\n", my_id, wall_time); } } else { /* the first candiate coarse grid is the coarse grid */ for (j=0;j<num_variables;j++) { if (CF_marker[j]==1) CF_marker[j]=C_PT; else CF_marker[j]=F_PT; } } /* BM May 19, 2006: Set all undecided points to be fine grid points. */ for (j=0;j<num_variables;j++) if (!CF_marker[j]) CF_marker[j]=F_PT; /*--------------------------------------------------- * Initialize the graph array *---------------------------------------------------*/ graph_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { graph_array[i] = -1; } if (debug_flag == 3) wall_time = time_getWallclockSeconds(); for (i=0; i < num_variables; i++) { if (ci_tilde_mark != i) ci_tilde = -1; if (CF_marker[i] == -1) { for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] > 0) graph_array[j] = i; } for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] == -1) { set_empty = 1; for (jj = S_i[j]; jj < S_i[j+1]; jj++) { index = S_j[jj]; if (graph_array[index] == i) { set_empty = 0; break; } } if (set_empty) { if (C_i_nonempty) { CF_marker[i] = 1; if (ci_tilde > -1) { CF_marker[ci_tilde] = -1; ci_tilde = -1; } C_i_nonempty = 0; break; } else { ci_tilde = j; ci_tilde_mark = i; CF_marker[j] = 1; C_i_nonempty = 1; i--; break; } } } } } } if (debug_flag == 3 && coarsen_type != 2) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 2nd pass = %f\n", my_id, wall_time); } /* third pass, check boundary fine points for coarse neighbors */ /*------------------------------------------------ * Exchange boundary data for CF_marker *------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (num_procs > 1) { comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d CGC boundary fix = %f\n", my_id, wall_time); } /*--------------------------------------------------- * Clean up and return *---------------------------------------------------*/ /*if (coarsen_type != 1) { */ if (CF_marker_offd) hypre_TFree(CF_marker_offd); /* BM Aug 21, 2006 */ if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */ /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */ /*} */ hypre_TFree(graph_array); if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) hypre_CSRMatrixDestroy(S_ext); *CF_marker_ptr = CF_marker; return (ierr); }
hypre_CSRBlockMatrix * hypre_ParCSRBlockMatrixExtractBExt(hypre_ParCSRBlockMatrix *B, hypre_ParCSRBlockMatrix *A, HYPRE_Int data) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(B); HYPRE_Int first_col_diag = hypre_ParCSRBlockMatrixFirstColDiag(B); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(B); HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(B); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); HYPRE_Int num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); HYPRE_Int *recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); HYPRE_Int *send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); HYPRE_Int *send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *tmp_comm_pkg; hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(B); HYPRE_Int *diag_i = hypre_CSRBlockMatrixI(diag); HYPRE_Int *diag_j = hypre_CSRBlockMatrixJ(diag); HYPRE_Complex *diag_data = hypre_CSRBlockMatrixData(diag); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(B); HYPRE_Int *offd_i = hypre_CSRBlockMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRBlockMatrixJ(offd); HYPRE_Complex *offd_data = hypre_CSRBlockMatrixData(offd); HYPRE_Int *B_int_i; HYPRE_Int *B_int_j; HYPRE_Complex *B_int_data; HYPRE_Int num_cols_B, num_nonzeros; HYPRE_Int num_rows_B_ext; HYPRE_Int num_procs, my_id; hypre_CSRBlockMatrix *B_ext; HYPRE_Int *B_ext_i; HYPRE_Int *B_ext_j; HYPRE_Complex *B_ext_data; HYPRE_Int *jdata_recv_vec_starts; HYPRE_Int *jdata_send_map_starts; HYPRE_Int i, j, k, l, counter, bnnz; HYPRE_Int start_index; HYPRE_Int j_cnt, jrow; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); bnnz = block_size * block_size; num_cols_B = hypre_ParCSRMatrixGlobalNumCols(B); num_rows_B_ext = recv_vec_starts[num_recvs]; B_int_i = hypre_CTAlloc(HYPRE_Int, send_map_starts[num_sends]+1); B_ext_i = hypre_CTAlloc(HYPRE_Int, num_rows_B_ext+1); /*-------------------------------------------------------------------------- * generate B_int_i through adding number of row-elements of offd and diag * for corresponding rows. B_int_i[j+1] contains the number of elements of * a row j (which is determined through send_map_elmts) *--------------------------------------------------------------------------*/ B_int_i[0] = 0; j_cnt = 0; num_nonzeros = 0; for (i=0; i < num_sends; i++) { for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++) { jrow = send_map_elmts[j]; B_int_i[++j_cnt] = offd_i[jrow+1] - offd_i[jrow] + diag_i[jrow+1] - diag_i[jrow]; num_nonzeros += B_int_i[j_cnt]; } } /*-------------------------------------------------------------------------- * initialize communication *--------------------------------------------------------------------------*/ comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg, &B_int_i[1],&B_ext_i[1]); B_int_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); if (data) B_int_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz); jdata_send_map_starts = hypre_CTAlloc(HYPRE_Int, num_sends+1); jdata_recv_vec_starts = hypre_CTAlloc(HYPRE_Int, num_recvs+1); start_index = B_int_i[0]; jdata_send_map_starts[0] = start_index; counter = 0; for (i=0; i < num_sends; i++) { num_nonzeros = counter; for (j = send_map_starts[i]; j < send_map_starts[i+1]; j++) { jrow = send_map_elmts[j]; for (k=diag_i[jrow]; k < diag_i[jrow+1]; k++) { B_int_j[counter] = diag_j[k]+first_col_diag; if (data) { for(l = 0; l < bnnz; l++) B_int_data[counter*bnnz+ l] = diag_data[k*bnnz+ l]; } counter++; } for (k=offd_i[jrow]; k < offd_i[jrow+1]; k++) { B_int_j[counter] = col_map_offd[offd_j[k]]; if (data) { for(l = 0; l < bnnz; l++) B_int_data[counter*bnnz+ l] = offd_data[k*bnnz+ l]; } counter++; } } num_nonzeros = counter - num_nonzeros; start_index += num_nonzeros; jdata_send_map_starts[i+1] = start_index; } tmp_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(tmp_comm_pkg) = comm; hypre_ParCSRCommPkgNumSends(tmp_comm_pkg) = num_sends; hypre_ParCSRCommPkgNumRecvs(tmp_comm_pkg) = num_recvs; hypre_ParCSRCommPkgSendProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgSendProcs(comm_pkg); hypre_ParCSRCommPkgRecvProcs(tmp_comm_pkg) = hypre_ParCSRCommPkgRecvProcs(comm_pkg); hypre_ParCSRCommPkgSendMapStarts(tmp_comm_pkg) = jdata_send_map_starts; hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; /*-------------------------------------------------------------------------- * after communication exchange B_ext_i[j+1] contains the number of elements * of a row j ! * evaluate B_ext_i and compute num_nonzeros for B_ext *--------------------------------------------------------------------------*/ for (i=0; i < num_recvs; i++) for (j = recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) B_ext_i[j+1] += B_ext_i[j]; num_nonzeros = B_ext_i[num_rows_B_ext]; B_ext = hypre_CSRBlockMatrixCreate(block_size, num_rows_B_ext, num_cols_B, num_nonzeros); B_ext_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); if (data) B_ext_data = hypre_CTAlloc(HYPRE_Complex, num_nonzeros*bnnz); for (i=0; i < num_recvs; i++) { start_index = B_ext_i[recv_vec_starts[i]]; num_nonzeros = B_ext_i[recv_vec_starts[i+1]]-start_index; jdata_recv_vec_starts[i+1] = B_ext_i[recv_vec_starts[i+1]]; } hypre_ParCSRCommPkgRecvVecStarts(tmp_comm_pkg) = jdata_recv_vec_starts; comm_handle = hypre_ParCSRCommHandleCreate(11,tmp_comm_pkg,B_int_j,B_ext_j); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (data) { comm_handle = hypre_ParCSRBlockCommHandleCreate(1, bnnz,tmp_comm_pkg, B_int_data, B_ext_data); hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; } hypre_CSRBlockMatrixI(B_ext) = B_ext_i; hypre_CSRBlockMatrixJ(B_ext) = B_ext_j; if (data) hypre_CSRBlockMatrixData(B_ext) = B_ext_data; hypre_TFree(B_int_i); hypre_TFree(B_int_j); if (data) hypre_TFree(B_int_data); hypre_TFree(jdata_send_map_starts); hypre_TFree(jdata_recv_vec_starts); hypre_TFree(tmp_comm_pkg); return B_ext; }
HYPRE_Int hypre_AMESetup(void *esolver) { HYPRE_Int ne, *edge_bc; hypre_AMEData *ame_data = esolver; hypre_AMSData *ams_data = ame_data -> precond; if (ams_data -> beta_is_zero) { ame_data -> t1 = hypre_ParVectorInDomainOf(ams_data -> G); ame_data -> t2 = hypre_ParVectorInDomainOf(ams_data -> G); } else { ame_data -> t1 = ams_data -> r1; ame_data -> t2 = ams_data -> g1; } ame_data -> t3 = ams_data -> r0; /* Eliminate boundary conditions in G = [Gii, Gib; 0, Gbb], i.e., compute [Gii, 0; 0, 0] */ { HYPRE_Int i, j, k, nv; HYPRE_Int *offd_edge_bc; hypre_ParCSRMatrix *Gt; nv = hypre_ParCSRMatrixNumCols(ams_data -> G); ne = hypre_ParCSRMatrixNumRows(ams_data -> G); edge_bc = hypre_TAlloc(HYPRE_Int, ne); for (i = 0; i < ne; i++) edge_bc[i] = 0; /* Find boundary (eliminated) edges */ { hypre_CSRMatrix *Ad = hypre_ParCSRMatrixDiag(ams_data -> A); HYPRE_Int *AdI = hypre_CSRMatrixI(Ad); HYPRE_Int *AdJ = hypre_CSRMatrixJ(Ad); HYPRE_Real *AdA = hypre_CSRMatrixData(Ad); hypre_CSRMatrix *Ao = hypre_ParCSRMatrixOffd(ams_data -> A); HYPRE_Int *AoI = hypre_CSRMatrixI(Ao); HYPRE_Real *AoA = hypre_CSRMatrixData(Ao); HYPRE_Real l1_norm; /* A row (edge) is boundary if its off-diag l1 norm is less than eps */ HYPRE_Real eps = DBL_EPSILON * 1e+4; for (i = 0; i < ne; i++) { l1_norm = 0.0; for (j = AdI[i]; j < AdI[i+1]; j++) if (AdJ[j] != i) l1_norm += fabs(AdA[j]); if (AoI) for (j = AoI[i]; j < AoI[i+1]; j++) l1_norm += fabs(AoA[j]); if (l1_norm < eps) edge_bc[i] = 1; } } hypre_ParCSRMatrixTranspose(ams_data -> G, &Gt, 1); /* Use a Matvec communication to find which of the edges connected to local vertices are on the boundary */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; offd_edge_bc = hypre_CTAlloc(HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(Gt))); hypre_MatvecCommPkgCreate(Gt); comm_pkg = hypre_ParCSRMatrixCommPkg(Gt); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); int_buf_data[index++] = edge_bc[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_edge_bc); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } /* Eliminate boundary vertex entries in G^t */ { hypre_CSRMatrix *Gtd = hypre_ParCSRMatrixDiag(Gt); HYPRE_Int *GtdI = hypre_CSRMatrixI(Gtd); HYPRE_Int *GtdJ = hypre_CSRMatrixJ(Gtd); HYPRE_Real *GtdA = hypre_CSRMatrixData(Gtd); hypre_CSRMatrix *Gto = hypre_ParCSRMatrixOffd(Gt); HYPRE_Int *GtoI = hypre_CSRMatrixI(Gto); HYPRE_Int *GtoJ = hypre_CSRMatrixJ(Gto); HYPRE_Real *GtoA = hypre_CSRMatrixData(Gto); HYPRE_Int bdr; for (i = 0; i < nv; i++) { bdr = 0; /* A vertex is boundary if it belongs to a boundary edge */ for (j = GtdI[i]; j < GtdI[i+1]; j++) if (edge_bc[GtdJ[j]]) { bdr = 1; break; } if (!bdr && GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) if (offd_edge_bc[GtoJ[j]]) { bdr = 1; break; } if (bdr) { for (j = GtdI[i]; j < GtdI[i+1]; j++) /* if (!edge_bc[GtdJ[j]]) */ GtdA[j] = 0.0; if (GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) /* if (!offd_edge_bc[GtoJ[j]]) */ GtoA[j] = 0.0; } } } hypre_ParCSRMatrixTranspose(Gt, &ame_data -> G, 1); hypre_ParCSRMatrixDestroy(Gt); hypre_TFree(offd_edge_bc); } /* Compute G^t M G */ { if (!hypre_ParCSRMatrixCommPkg(ame_data -> G)) hypre_MatvecCommPkgCreate(ame_data -> G); if (!hypre_ParCSRMatrixCommPkg(ame_data -> M)) hypre_MatvecCommPkgCreate(ame_data -> M); hypre_BoomerAMGBuildCoarseOperator(ame_data -> G, ame_data -> M, ame_data -> G, &ame_data -> A_G); hypre_ParCSRMatrixFixZeroRows(ame_data -> A_G); } /* Create AMG preconditioner and PCG-AMG solver for G^tMG */ { HYPRE_BoomerAMGCreate(&ame_data -> B1_G); HYPRE_BoomerAMGSetCoarsenType(ame_data -> B1_G, ams_data -> B_G_coarsen_type); HYPRE_BoomerAMGSetAggNumLevels(ame_data -> B1_G, ams_data -> B_G_agg_levels); HYPRE_BoomerAMGSetRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type); HYPRE_BoomerAMGSetNumSweeps(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetMaxLevels(ame_data -> B1_G, 25); HYPRE_BoomerAMGSetTol(ame_data -> B1_G, 0.0); HYPRE_BoomerAMGSetMaxIter(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetStrongThreshold(ame_data -> B1_G, ams_data -> B_G_theta); /* don't use exact solve on the coarsest level (matrix may be singular) */ HYPRE_BoomerAMGSetCycleRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type, 3); HYPRE_ParCSRPCGCreate(hypre_ParCSRMatrixComm(ame_data->A_G), &ame_data -> B2_G); HYPRE_PCGSetPrintLevel(ame_data -> B2_G, 0); HYPRE_PCGSetTol(ame_data -> B2_G, 1e-12); HYPRE_PCGSetMaxIter(ame_data -> B2_G, 20); HYPRE_PCGSetPrecond(ame_data -> B2_G, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, ame_data -> B1_G); HYPRE_ParCSRPCGSetup(ame_data -> B2_G, (HYPRE_ParCSRMatrix)ame_data->A_G, (HYPRE_ParVector)ame_data->t1, (HYPRE_ParVector)ame_data->t2); } /* Setup LOBPCG */ { HYPRE_Int seed = 75; mv_InterfaceInterpreter* interpreter; mv_MultiVectorPtr eigenvectors; ame_data -> interpreter = hypre_CTAlloc(mv_InterfaceInterpreter,1); interpreter = (mv_InterfaceInterpreter*) ame_data -> interpreter; HYPRE_ParCSRSetupInterpreter(interpreter); ame_data -> eigenvalues = hypre_CTAlloc(HYPRE_Real, ame_data -> block_size); ame_data -> eigenvectors = mv_MultiVectorCreateFromSampleVector(interpreter, ame_data -> block_size, ame_data -> t3); eigenvectors = (mv_MultiVectorPtr) ame_data -> eigenvectors; mv_MultiVectorSetRandom (eigenvectors, seed); /* Make the initial vectors discretely divergence free */ { HYPRE_Int i, j; HYPRE_Real *data; mv_TempMultiVector* tmp = mv_MultiVectorGetData(eigenvectors); HYPRE_ParVector *v = (HYPRE_ParVector*)(tmp -> vector); hypre_ParVector *vi; for (i = 0; i < ame_data -> block_size; i++) { vi = (hypre_ParVector*) v[i]; data = hypre_VectorData(hypre_ParVectorLocalVector(vi)); for (j = 0; j < ne; j++) if (edge_bc[j]) data[j] = 0.0; hypre_AMEDiscrDivFreeComponent(esolver, vi); } } } hypre_TFree(edge_bc); return hypre_error_flag; }
void hypre_ParCSRMatrixSplit(hypre_ParCSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, hypre_ParCSRMatrix **blocks, int interleaved_rows, int interleaved_cols) { HYPRE_Int i, j, k; MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *Adiag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *Aoffd = hypre_ParCSRMatrixOffd(A); HYPRE_Int global_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int global_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int local_rows = hypre_CSRMatrixNumRows(Adiag); HYPRE_Int local_cols = hypre_CSRMatrixNumCols(Adiag); HYPRE_Int offd_cols = hypre_CSRMatrixNumCols(Aoffd); hypre_assert(local_rows % nr == 0 && local_cols % nc == 0); hypre_assert(global_rows % nr == 0 && global_cols % nc == 0); HYPRE_Int block_rows = local_rows / nr; HYPRE_Int block_cols = local_cols / nc; HYPRE_Int num_blocks = nr * nc; /* mark local rows and columns with block number */ HYPRE_Int *row_block_num = hypre_TAlloc(HYPRE_Int, local_rows); HYPRE_Int *col_block_num = hypre_TAlloc(HYPRE_Int, local_cols); for (i = 0; i < local_rows; i++) { row_block_num[i] = interleaved_rows ? (i % nr) : (i / block_rows); } for (i = 0; i < local_cols; i++) { col_block_num[i] = interleaved_cols ? (i % nc) : (i / block_cols); } /* determine the block numbers for offd columns */ HYPRE_Int* offd_col_block_num = hypre_TAlloc(HYPRE_Int, offd_cols); hypre_ParCSRCommHandle *comm_handle; HYPRE_Int *int_buf_data; { /* make sure A has a communication package */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* calculate the final global column numbers for each block */ HYPRE_Int *count = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_global_col = hypre_TAlloc(HYPRE_Int, local_cols); HYPRE_Int first_col = hypre_ParCSRMatrixFirstColDiag(A) / nc; for (i = 0; i < local_cols; i++) { block_global_col[i] = first_col + count[col_block_num[i]]++; } hypre_TFree(count); /* use a Matvec communication pattern to determine offd_col_block_num */ HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); HYPRE_Int start, index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = col_block_num[k] + nc*block_global_col[k]; } } hypre_TFree(block_global_col); comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_col_block_num); } /* create the block matrices */ HYPRE_Int num_procs = 1; if (!hypre_ParCSRMatrixAssumedPartition(A)) { hypre_MPI_Comm_size(comm, &num_procs); } HYPRE_Int *row_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); HYPRE_Int *col_starts = hypre_TAlloc(HYPRE_Int, num_procs+1); for (i = 0; i <= num_procs; i++) { row_starts[i] = hypre_ParCSRMatrixRowStarts(A)[i] / nr; col_starts[i] = hypre_ParCSRMatrixColStarts(A)[i] / nc; } for (i = 0; i < num_blocks; i++) { blocks[i] = hypre_ParCSRMatrixCreate(comm, global_rows/nr, global_cols/nc, row_starts, col_starts, 0, 0, 0); } /* split diag part */ hypre_CSRMatrix **csr_blocks = hypre_TAlloc(hypre_CSRMatrix*, nr*nc); hypre_CSRMatrixSplit(Adiag, nr, nc, row_block_num, col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixDiag(blocks[i])); hypre_ParCSRMatrixDiag(blocks[i]) = csr_blocks[i]; } /* finish communication, receive offd_col_block_num */ hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); /* decode global offd column numbers */ HYPRE_Int* offd_global_col = hypre_TAlloc(HYPRE_Int, offd_cols); for (i = 0; i < offd_cols; i++) { offd_global_col[i] = offd_col_block_num[i] / nc; offd_col_block_num[i] %= nc; } /* split offd part */ hypre_CSRMatrixSplit(Aoffd, nr, nc, row_block_num, offd_col_block_num, csr_blocks); for (i = 0; i < num_blocks; i++) { hypre_TFree(hypre_ParCSRMatrixOffd(blocks[i])); hypre_ParCSRMatrixOffd(blocks[i]) = csr_blocks[i]; } hypre_TFree(csr_blocks); hypre_TFree(col_block_num); hypre_TFree(row_block_num); /* update block col-maps */ for (int bi = 0; bi < nr; bi++) { for (int bj = 0; bj < nc; bj++) { hypre_ParCSRMatrix *block = blocks[bi*nc + bj]; hypre_CSRMatrix *block_offd = hypre_ParCSRMatrixOffd(block); HYPRE_Int block_offd_cols = hypre_CSRMatrixNumCols(block_offd); HYPRE_Int *block_col_map = hypre_TAlloc(HYPRE_Int, block_offd_cols); for (i = j = 0; i < offd_cols; i++) { HYPRE_Int bn = offd_col_block_num[i]; if (bn == bj) { block_col_map[j++] = offd_global_col[i]; } } hypre_assert(j == block_offd_cols); hypre_ParCSRMatrixColMapOffd(block) = block_col_map; } } hypre_TFree(offd_global_col); hypre_TFree(offd_col_block_num); /* finish the new matrices, make them own all the stuff */ for (i = 0; i < num_blocks; i++) { hypre_ParCSRMatrixSetNumNonzeros(blocks[i]); hypre_MatvecCommPkgCreate(blocks[i]); hypre_ParCSRMatrixOwnsData(blocks[i]) = 1; /* only the first block will own the row/col_starts */ hypre_ParCSRMatrixOwnsRowStarts(blocks[i]) = !i; hypre_ParCSRMatrixOwnsColStarts(blocks[i]) = !i; } }
/* Function: hypre_ParCSRMatrixEliminateAAe (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / 0 | A_ib \ Ae = | -----+--------- | \ A_bi | A_bb - I / */ void hypre_ParCSRMatrixEliminateAAe(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **Ae, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim) { HYPRE_Int i, j, k; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int A_diag_nrows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int A_offd_ncols = hypre_CSRMatrixNumCols(A_offd); *Ae = hypre_ParCSRMatrixCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixGlobalNumCols(A), hypre_ParCSRMatrixRowStarts(A), hypre_ParCSRMatrixColStarts(A), 0, 0, 0); hypre_ParCSRMatrixSetRowStartsOwner(*Ae, 0); hypre_ParCSRMatrixSetColStartsOwner(*Ae, 0); hypre_CSRMatrix *Ae_diag = hypre_ParCSRMatrixDiag(*Ae); hypre_CSRMatrix *Ae_offd = hypre_ParCSRMatrixOffd(*Ae); HYPRE_Int Ae_offd_ncols; HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Int *A_col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *Ae_col_map_offd; HYPRE_Int *col_mark; HYPRE_Int *col_remap; /* figure out which offd cols should be eliminated */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; HYPRE_Int *eliminate_row = hypre_CTAlloc(HYPRE_Int, A_diag_nrows); HYPRE_Int *eliminate_col = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* which of the local rows are to be eliminated */ for (i = 0; i < A_diag_nrows; i++) { eliminate_row[i] = 0; } for (i = 0; i < num_rowscols_to_elim; i++) { eliminate_row[rowscols_to_elim[i]] = 1; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); int_buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); /* eliminate diagonal part, overlapping it with communication */ hypre_CSRMatrixElimCreate(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, NULL); hypre_CSRMatrixEliminateRowsCols(A_diag, Ae_diag, num_rowscols_to_elim, rowscols_to_elim, num_rowscols_to_elim, rowscols_to_elim, 1, NULL); hypre_CSRMatrixReorder(Ae_diag); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < A_offd_ncols; i++) { if (eliminate_col[i]) { offd_cols_to_elim[num_offd_cols_to_elim++] = i; } } hypre_TFree(int_buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); } /* eliminate the off-diagonal part */ col_mark = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); col_remap = hypre_CTAlloc(HYPRE_Int, A_offd_ncols); hypre_CSRMatrixElimCreate(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, col_mark); for (i = k = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { col_remap[i] = k++; } } hypre_CSRMatrixEliminateRowsCols(A_offd, Ae_offd, num_rowscols_to_elim, rowscols_to_elim, num_offd_cols_to_elim, offd_cols_to_elim, 0, col_remap); /* create col_map_offd for Ae */ Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_offd_ncols++; } } Ae_col_map_offd = hypre_CTAlloc(HYPRE_Int, Ae_offd_ncols); Ae_offd_ncols = 0; for (i = 0; i < A_offd_ncols; i++) { if (col_mark[i]) { Ae_col_map_offd[Ae_offd_ncols++] = A_col_map_offd[i]; } } hypre_ParCSRMatrixColMapOffd(*Ae) = Ae_col_map_offd; hypre_CSRMatrixNumCols(Ae_offd) = Ae_offd_ncols; hypre_TFree(col_remap); hypre_TFree(col_mark); hypre_TFree(offd_cols_to_elim); hypre_ParCSRMatrixSetNumNonzeros(*Ae); hypre_MatvecCommPkgCreate(*Ae); }
/* Function: hypre_ParCSRMatrixEliminateAXB This function eliminates the global rows and columns of a matrix A corresponding to given lists of sorted (!) local row numbers, so that the solution to the system A*X = B is X_b for the given rows. The elimination is done as follows: (input) (output) / A_ii | A_ib \ / A_ii | 0 \ A = | -----+----- | ---> | -----+----- | \ A_bi | A_bb / \ 0 | I / / X_i \ / X_i \ X = | --- | ---> | --- | (no change) \ X_b / \ X_b / / B_i \ / B_i - A_ib * X_b \ B = | --- | ---> | ---------------- | \ B_b / \ X_b / */ void hypre_ParCSRMatrixEliminateAXB(hypre_ParCSRMatrix *A, HYPRE_Int num_rowscols_to_elim, HYPRE_Int *rowscols_to_elim, hypre_ParVector *X, hypre_ParVector *B) { hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int diag_nrows = hypre_CSRMatrixNumRows(diag); HYPRE_Int offd_ncols = hypre_CSRMatrixNumCols(offd); hypre_Vector *Xlocal = hypre_ParVectorLocalVector(X); hypre_Vector *Blocal = hypre_ParVectorLocalVector(B); HYPRE_Real *Bdata = hypre_VectorData(Blocal); HYPRE_Real *Xdata = hypre_VectorData(Xlocal); HYPRE_Int num_offd_cols_to_elim; HYPRE_Int *offd_cols_to_elim; HYPRE_Real *eliminate_coefs; /* figure out which offd cols should be eliminated and with what coef */ hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends; HYPRE_Int index, start; HYPRE_Int i, j, k, irow; HYPRE_Real *eliminate_row = hypre_CTAlloc(HYPRE_Real, diag_nrows); HYPRE_Real *eliminate_col = hypre_CTAlloc(HYPRE_Real, offd_ncols); HYPRE_Real *buf_data, coef; /* make sure A has a communication package */ comm_pkg = hypre_ParCSRMatrixCommPkg(A); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } /* HACK: rows that shouldn't be eliminated are marked with quiet NaN; those that should are set to the boundary value from X; this is to avoid sending complex type (int+double) or communicating twice. */ for (i = 0; i < diag_nrows; i++) { eliminate_row[i] = std::numeric_limits<HYPRE_Real>::quiet_NaN(); } for (i = 0; i < num_rowscols_to_elim; i++) { irow = rowscols_to_elim[i]; eliminate_row[irow] = Xdata[irow]; } /* use a Matvec communication pattern to find (in eliminate_col) which of the local offd columns are to be eliminated */ num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); buf_data = hypre_CTAlloc(HYPRE_Real, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); buf_data[index++] = eliminate_row[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, eliminate_col); /* do sequential part of the elimination while stuff is getting sent */ hypre_CSRMatrixEliminateAXB(diag, num_rowscols_to_elim, rowscols_to_elim, Xlocal, Blocal); /* finish the communication */ hypre_ParCSRCommHandleDestroy(comm_handle); /* received eliminate_col[], count offd columns to eliminate */ num_offd_cols_to_elim = 0; for (i = 0; i < offd_ncols; i++) { coef = eliminate_col[i]; if (coef == coef) // test for NaN { num_offd_cols_to_elim++; } } offd_cols_to_elim = hypre_CTAlloc(HYPRE_Int, num_offd_cols_to_elim); eliminate_coefs = hypre_CTAlloc(HYPRE_Real, num_offd_cols_to_elim); /* get a list of offd column indices and coefs */ num_offd_cols_to_elim = 0; for (i = 0; i < offd_ncols; i++) { coef = eliminate_col[i]; if (coef == coef) // test for NaN { offd_cols_to_elim[num_offd_cols_to_elim] = i; eliminate_coefs[num_offd_cols_to_elim] = coef; num_offd_cols_to_elim++; } } hypre_TFree(buf_data); hypre_TFree(eliminate_row); hypre_TFree(eliminate_col); /* eliminate the off-diagonal part */ hypre_CSRMatrixEliminateOffdColsAXB(offd, num_offd_cols_to_elim, offd_cols_to_elim, eliminate_coefs, Blocal); hypre_CSRMatrixEliminateOffdRowsAXB(offd, num_rowscols_to_elim, rowscols_to_elim); /* set boundary values in the rhs */ for (int i = 0; i < num_rowscols_to_elim; i++) { irow = rowscols_to_elim[i]; Bdata[irow] = Xdata[irow]; } hypre_TFree(offd_cols_to_elim); hypre_TFree(eliminate_coefs); }
HYPRE_Int hypre_CreateLambda(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ MPI_Comm comm; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_ParCSRMatrix *Lambda; hypre_CSRMatrix *L_diag; hypre_CSRMatrix *L_offd; hypre_CSRMatrix *A_tmp_diag; hypre_CSRMatrix *A_tmp_offd; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; hypre_ParCSRCommPkg *comm_pkg; hypre_ParCSRCommPkg *L_comm_pkg = NULL; hypre_ParCSRCommHandle *comm_handle; HYPRE_Real *L_diag_data; HYPRE_Real *L_offd_data; HYPRE_Real *buf_data = NULL; HYPRE_Real *tmp_data; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *l1_norms; HYPRE_Real *A_tmp_diag_data; HYPRE_Real *A_tmp_offd_data; HYPRE_Real *D_data = NULL; HYPRE_Real *D_data_offd = NULL; HYPRE_Int *L_diag_i; HYPRE_Int *L_diag_j; HYPRE_Int *L_offd_i; HYPRE_Int *L_offd_j; HYPRE_Int *A_tmp_diag_i; HYPRE_Int *A_tmp_offd_i; HYPRE_Int *A_tmp_diag_j; HYPRE_Int *A_tmp_offd_j; HYPRE_Int *L_recv_ptr = NULL; HYPRE_Int *L_send_ptr = NULL; HYPRE_Int *L_recv_procs = NULL; HYPRE_Int *L_send_procs = NULL; HYPRE_Int *L_send_map_elmts = NULL; HYPRE_Int *recv_procs; HYPRE_Int *send_procs; HYPRE_Int *send_map_elmts; HYPRE_Int *send_map_starts; HYPRE_Int *recv_vec_starts; HYPRE_Int *all_send_procs = NULL; HYPRE_Int *all_recv_procs = NULL; HYPRE_Int *remap = NULL; HYPRE_Int *level_start; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_procs; HYPRE_Int num_sends, num_recvs; HYPRE_Int num_sends_L = 0; HYPRE_Int num_recvs_L = 0; HYPRE_Int send_data_L = 0; HYPRE_Int num_rows_L = 0; HYPRE_Int num_rows_tmp = 0; HYPRE_Int num_cols_offd_L = 0; HYPRE_Int num_cols_offd = 0; HYPRE_Int level, i, j, k; HYPRE_Int this_proc, cnt, cnt_diag, cnt_offd; HYPRE_Int cnt_recv, cnt_send, cnt_row, row_start; HYPRE_Int start_diag, start_offd, indx, cnt_map; HYPRE_Int start, j_indx, index, cnt_level; HYPRE_Int max_sends, max_recvs; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_size(comm,&num_procs); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); addlvl = hypre_max(additive, mult_additive); num_add_lvls = num_levels+1-addlvl; level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1); send_data_L = 0; num_rows_L = 0; num_cols_offd_L = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; level_start[0] = 0; cnt = 1; max_sends = 0; max_recvs = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd); num_rows_L += num_rows_tmp; level_start[cnt] = level_start[cnt-1] + num_rows_tmp; cnt++; num_cols_offd_L += num_cols_offd; num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp]; num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); max_sends += num_sends; if (num_sends) send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends); max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg); } } if (max_sends >= num_procs ||max_recvs >= num_procs) { max_sends = num_procs; max_recvs = num_procs; } if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends); if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs); cnt_send = 0; cnt_recv = 0; if (max_sends || max_recvs) { if (max_sends < num_procs && max_recvs < num_procs) { for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); for (j = 0; j < num_sends; j++) all_send_procs[cnt_send++] = send_procs[j]; for (j = 0; j < num_recvs; j++) all_recv_procs[cnt_recv++] = recv_procs[j]; } } if (max_sends) { qsort0(all_send_procs, 0, max_sends-1); num_sends_L = 1; this_proc = all_send_procs[0]; for (i=1; i < max_sends; i++) { if (all_send_procs[i] > this_proc) { this_proc = all_send_procs[i]; all_send_procs[num_sends_L++] = this_proc; } } L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); for (j=0; j < num_sends_L; j++) L_send_procs[j] = all_send_procs[j]; hypre_TFree(all_send_procs); } if (max_recvs) { qsort0(all_recv_procs, 0, max_recvs-1); num_recvs_L = 1; this_proc = all_recv_procs[0]; for (i=1; i < max_recvs; i++) { if (all_recv_procs[i] > this_proc) { this_proc = all_recv_procs[i]; all_recv_procs[num_recvs_L++] = this_proc; } } L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); for (j=0; j < num_recvs_L; j++) L_recv_procs[j] = all_recv_procs[j]; hypre_TFree(all_recv_procs); } L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } for (k = 0; k < num_sends; k++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L); L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k]; } for (k = 0; k < num_recvs; k++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L); L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k]; } } L_recv_ptr[0] = 0; for (i=1; i < num_recvs_L; i++) L_recv_ptr[i+1] += L_recv_ptr[i]; L_send_ptr[0] = 0; for (i=1; i < num_sends_L; i++) L_send_ptr[i+1] += L_send_ptr[i]; } else { num_recvs_L = 0; num_sends_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); for (j = 0; j < num_sends; j++) { this_proc = send_procs[j]; if (all_send_procs[this_proc] == 0) num_sends_L++; all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j]; } for (j = 0; j < num_recvs; j++) { this_proc = recv_procs[j]; if (all_recv_procs[this_proc] == 0) num_recvs_L++; all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j]; } } } if (max_sends) { L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); num_sends_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_send_procs[j]; if (this_proc) { L_send_procs[num_sends_L++] = j; L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1]; } } } if (max_recvs) { L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); num_recvs_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_recv_procs[j]; if (this_proc) { L_recv_procs[num_recvs_L++] = j; L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1]; } } } } } if (max_sends) hypre_TFree(all_send_procs); if (max_recvs) hypre_TFree(all_recv_procs); L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag); L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd); hypre_CSRMatrixInitialize(L_diag); hypre_CSRMatrixInitialize(L_offd); if (num_nonzeros_diag) { L_diag_data = hypre_CSRMatrixData(L_diag); L_diag_j = hypre_CSRMatrixJ(L_diag); } L_diag_i = hypre_CSRMatrixI(L_diag); if (num_nonzeros_offd) { L_offd_data = hypre_CSRMatrixData(L_offd); L_offd_j = hypre_CSRMatrixJ(L_offd); } L_offd_i = hypre_CSRMatrixI(L_offd); if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L); if (send_data_L) { L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L); buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L); } if (num_cols_offd_L) { D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L); /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/ remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L); } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); cnt = 0; cnt_level = 0; cnt_diag = 0; cnt_offd = 0; cnt_row = 1; L_diag_i[0] = 0; L_offd_i[0] = 0; for (level=addlvl; level < num_levels; level++) { row_start = level_start[cnt_level]; if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } cnt_level++; start_diag = L_diag_i[cnt_row-1]; start_offd = L_offd_i[cnt_row-1]; A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag); A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd); A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } /* Compute new combined communication package */ for (i=0; i < num_sends; i++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L); indx = L_send_ptr[this_proc]; for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++) { L_send_map_elmts[indx++] = row_start + send_map_elmts[j]; } L_send_ptr[this_proc] = indx; } cnt_map = 0; for (i = 0; i < num_recvs; i++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L); indx = L_recv_ptr[this_proc]; for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { remap[cnt_map++] = indx++; } L_recv_ptr[this_proc] = indx; } /* Compute Lambda */ if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = 1.0/l1_norms[i]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } if (num_procs > 1) { index = 0; for (i=0; i < num_sends; i++) { start = send_map_starts[i]; for (j=start; j < send_map_starts[i+1]; j++) buf_data[index++] = D_data[send_map_elmts[j]]; } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, D_data_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } for (i = 0; i < num_rows_tmp; i++) { j_indx = A_tmp_diag_i[i]; L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i]; L_diag_j[cnt_diag++] = i+row_start; for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++) { j_indx = A_tmp_diag_j[j]; L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i]; L_diag_j[cnt_diag++] = j_indx+row_start; } for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++) { j_indx = A_tmp_offd_j[j]; L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i]; L_offd_j[cnt_offd++] = remap[j_indx]; } } cnt_row += num_rows_tmp; } if (L_send_ptr) { for (i=num_sends_L-1; i > 0; i--) L_send_ptr[i] = L_send_ptr[i-1]; L_send_ptr[0] = 0; } else L_send_ptr = hypre_CTAlloc(HYPRE_Int,1); if (L_recv_ptr) { for (i=num_recvs_L-1; i > 0; i--) L_recv_ptr[i] = L_recv_ptr[i-1]; L_recv_ptr[0] = 0; } else L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1); L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L; hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L; hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs; hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs; hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr; hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr; hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts; hypre_ParCSRCommPkgComm(L_comm_pkg) = comm; Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1); hypre_ParCSRMatrixDiag(Lambda) = L_diag; hypre_ParCSRMatrixOffd(Lambda) = L_offd; hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg; hypre_ParCSRMatrixComm(Lambda) = comm; hypre_ParCSRMatrixOwnsData(Lambda) = 1; hypre_ParAMGDataLambda(amg_data) = Lambda; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; hypre_TFree(D_data_offd); hypre_TFree(D_data); if (num_procs > 1) hypre_TFree(buf_data); hypre_TFree(remap); hypre_TFree(buf_data); hypre_TFree(level_start); return Solve_err_flag; }