HYPRE_Int hypre_MatTCommPkgCreate ( hypre_ParCSRMatrix *A) { hypre_ParCSRCommPkg *comm_pkg; MPI_Comm comm = hypre_ParCSRMatrixComm(A); /* hypre_MPI_Datatype *recv_mpi_types; hypre_MPI_Datatype *send_mpi_types; */ HYPRE_Int num_sends; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int num_recvs; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(A); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(A); HYPRE_Int ierr = 0; HYPRE_Int num_rows_diag = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_diag = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(A)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A)); HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(A); hypre_MatTCommPkgCreate_core ( comm, col_map_offd, first_col_diag, col_starts, num_rows_diag, num_cols_diag, num_cols_offd, row_starts, hypre_ParCSRMatrixFirstColDiag(A), hypre_ParCSRMatrixColMapOffd(A), hypre_CSRMatrixI( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixDiag(A) ), hypre_CSRMatrixI( hypre_ParCSRMatrixOffd(A) ), hypre_CSRMatrixJ( hypre_ParCSRMatrixOffd(A) ), 1, &num_recvs, &recv_procs, &recv_vec_starts, &num_sends, &send_procs, &send_map_starts, &send_map_elmts ); comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg, 1); hypre_ParCSRCommPkgComm(comm_pkg) = comm; hypre_ParCSRCommPkgNumRecvs(comm_pkg) = num_recvs; hypre_ParCSRCommPkgRecvProcs(comm_pkg) = recv_procs; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg) = recv_vec_starts; hypre_ParCSRCommPkgNumSends(comm_pkg) = num_sends; hypre_ParCSRCommPkgSendProcs(comm_pkg) = send_procs; hypre_ParCSRCommPkgSendMapStarts(comm_pkg) = send_map_starts; hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = send_map_elmts; hypre_ParCSRMatrixCommPkgT(A) = comm_pkg; return ierr; }
HYPRE_Int hypre_IJMatrixInitializePETSc(hypre_IJMatrix *matrix) { HYPRE_Int ierr = 0; hypre_ParCSRMatrix *par_matrix = hypre_IJMatrixLocalStorage(matrix); hypre_AuxParCSRMatrix *aux_matrix = hypre_IJMatrixTranslator(matrix); HYPRE_Int local_num_rows = hypre_AuxParCSRMatrixLocalNumRows(aux_matrix); HYPRE_Int local_num_cols = hypre_AuxParCSRMatrixLocalNumCols(aux_matrix); HYPRE_Int *row_space = hypre_AuxParCSRMatrixRowSpace(aux_matrix); HYPRE_Int num_nonzeros = hypre_ParCSRMatrixNumNonzeros(par_matrix); HYPRE_Int local_nnz; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_IJMatrixContext(matrix); HYPRE_Int global_num_rows = hypre_IJMatrixM(matrix); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); local_nnz = (num_nonzeros/global_num_rows+1)*local_num_rows; if (local_num_rows < 0) hypre_AuxParCSRMatrixLocalNumRows(aux_matrix) = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(par_matrix)); if (local_num_cols < 0) hypre_AuxParCSRMatrixLocalNumCols(aux_matrix) = hypre_CSRMatrixNumCols(hypre_ParCSRMatrixDiag(par_matrix)); ierr = hypre_AuxParCSRMatrixInitialize(aux_matrix); ierr += hypre_ParCSRMatrixInitialize(par_matrix); return ierr; }
HYPRE_Int hypre_ParCSRMatrix_dof_func_offd( hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int **dof_func_offd ) { hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_offd = 0; HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_sends; HYPRE_Int *int_buf_data; HYPRE_Int index, start, i, j; num_cols_offd = hypre_CSRMatrixNumCols(A_offd); *dof_func_offd = NULL; if (num_cols_offd) { if (num_functions > 1) *dof_func_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); } /*------------------------------------------------------------------- * Get the dof_func data for the off-processor columns *-------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_functions > 1) { int_buf_data = hypre_CTAlloc(HYPRE_Int,hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j=start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = dof_func[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate( 11, comm_pkg, int_buf_data, *dof_func_offd); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } return(Solve_err_flag); }
/* Based on hypre_ParCSRMatrixMatvec in par_csr_matvec.c */ void hypre_ParCSRMatrixBooleanMatvec(hypre_ParCSRMatrix *A, HYPRE_Bool alpha, HYPRE_Bool *x, HYPRE_Bool beta, HYPRE_Bool *y) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int num_sends, i, j, index; HYPRE_Bool *x_tmp, *x_buf; x_tmp = hypre_CTAlloc(HYPRE_Bool, num_cols_offd); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf = hypre_CTAlloc(HYPRE_Bool, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { j = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for ( ; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { x_buf[index++] = x[hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)]; } } comm_handle = hypre_ParCSRCommHandleCreate_bool(1, comm_pkg, x_buf, x_tmp); hypre_CSRMatrixBooleanMatvec(diag, alpha, x, beta, y); hypre_ParCSRCommHandleDestroy(comm_handle); if (num_cols_offd) { hypre_CSRMatrixBooleanMatvec(offd, alpha, x_tmp, 1, y); } hypre_TFree(x_buf); hypre_TFree(x_tmp); }
HYPRE_Int AmgCGCBoundaryFix (hypre_ParCSRMatrix *S,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd) /* Checks whether an interpolation is possible for a fine grid point with strong couplings. * Required after CGC coarsening * ======================================================================================== * S : the strength matrix * CF_marker, CF_marker_offd : the coarse/fine markers * ========================================================================================*/ { HYPRE_Int mpirank,i,j,has_c_pt,ierr=0; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int added_cpts=0; MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_MPI_Comm_rank (comm,&mpirank); if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); } for (i=0;i<num_variables;i++) { if (S_offd_i[i]==S_offd_i[i+1] || CF_marker[i] == C_PT) continue; has_c_pt=0; /* fine grid point with strong connections across the boundary */ for (j=S_i[i];j<S_i[i+1];j++) if (CF_marker[S_j[j]] == C_PT) {has_c_pt=1; break;} if (has_c_pt) continue; for (j=S_offd_i[i];j<S_offd_i[i+1];j++) if (CF_marker_offd[S_offd_j[j]] == C_PT) {has_c_pt=1; break;} if (has_c_pt) continue; /* all points i is strongly coupled to are fine: make i C_PT */ CF_marker[i] = C_PT; #if 0 hypre_printf ("Processor %d: added point %d in AmgCGCBoundaryFix\n",mpirank,i); #endif added_cpts++; } #if 0 if (added_cpts) hypre_printf ("Processor %d: added %d points in AmgCGCBoundaryFix\n",mpirank,added_cpts); fflush(stdout); #endif return(ierr); }
/* coarse (marked >=0) rows of P copied from C Both matrices have the same sizes. */ void hypre_ParCSRMatrixCopy_C( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) { hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); double *P_diag_data = hypre_CSRMatrixData(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int i1, m; for ( i1= 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1] >= 0 ) /* Coarse rows only */ { for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m ) { P_diag_data[m] = C_diag_data[m]; } } } if ( num_cols_offd_C ) for ( i1= 0; i1 < num_rows_offd_C; i1++ ) { if ( CF_marker[i1] >= 0 ) /* Coarse rows only */ { for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m ) { P_offd_data[m] = C_offd_data[m]; } } } }
/* fine (marked <0 ) rows of Pnew set to 0 */ void hypre_ParCSRMatrixZero_F( hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker ) { hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int i1, m; for ( i1= 0; i1 < num_rows_diag_P; i1++ ) { if ( CF_marker[i1] < 0 ) /* Fine rows only */ { for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { P_diag_data[m] = 0; } } } if ( num_cols_offd_P ) for ( i1= 0; i1 < num_rows_offd_P; i1++ ) { if ( CF_marker[i1] < 0 ) /* Fine rows only */ { for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { P_offd_data[m] = 0; } } } }
/*-------------------------------------------------------------------------- * hypre_ParCSRMatrixMatvec_FF *--------------------------------------------------------------------------*/ HYPRE_Int hypre_ParCSRMatrixMatvec_FF( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y, HYPRE_Int *CF_marker, HYPRE_Int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, index, start, num_procs; HYPRE_Int *int_buf_data = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Complex *x_tmp_data = NULL; HYPRE_Complex *x_buf_data = NULL; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); } return ierr; }
HYPRE_Int hypre_ParCSRMatrixMatvec( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x_local); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int ierr = 0; HYPRE_Int num_sends, i, j, jv, index, start; HYPRE_Int vecstride = hypre_VectorVectorStride( x_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( x_local ); HYPRE_Complex *x_tmp_data, **x_buf_data; HYPRE_Complex *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( idxstride>0 ); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; hypre_assert( hypre_VectorNumVectors(y_local)==num_vectors ); if ( num_vectors==1 ) x_tmp = hypre_SeqVectorCreate( num_cols_offd ); else { hypre_assert( num_vectors>1 ); x_tmp = hypre_SeqMultiVectorCreate( num_cols_offd, num_vectors ); } hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 ); /* ... The assert is because the following loop only works for 'column' storage of a multivector. This needs to be fixed to work more generally, at least for 'row' storage. This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a new arg or a new variable inside CommPkg). Or put the num_vector iteration inside CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; double *y_data_expand = NULL; int offset = 0; #ifdef HYPRE_USING_OPENMP int my_thread_num = 0; #endif int i, j, jv, jj; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { y_data_expand = hypre_CTAlloc(double, num_threads*y_size); if ( num_vectors==1 ) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,j, my_thread_num, offset) { my_thread_num = omp_get_thread_num(); offset = y_size*my_thread_num; #pragma omp for schedule(static) #endif for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data_expand[offset + j] += A_data[jj] * x_data[i]; } } #ifdef HYPRE_USING_OPENMP /* implied barrier */ #pragma omp for schedule(static) #endif for (i = 0; i < y_size; i++) { for (j = 0; j < num_threads; j++) { y_data[i] += y_data_expand[j*y_size + i]; /*y_data_expand[j*y_size + i] = 0; //zero out for next time */ } } #ifdef HYPRE_USING_OPENMP } /* end parallel region */ #endif hypre_TFree(y_data_expand); } else { /* MULTIPLE VECTORS NOT THREADED YET */ for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } hypre_TFree(y_data_expand); }
HYPRE_Int hypre_BoomerAMGCreateScalarCFS(hypre_ParCSRMatrix *SN, HYPRE_Int *CFN_marker, HYPRE_Int *col_offd_SN_to_AN, HYPRE_Int num_functions, HYPRE_Int nodal, HYPRE_Int data, HYPRE_Int **dof_func_ptr, HYPRE_Int **CF_marker_ptr, HYPRE_Int **col_offd_S_to_A_ptr, hypre_ParCSRMatrix **S_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(SN); hypre_ParCSRMatrix *S; hypre_CSRMatrix *S_diag; HYPRE_Int *S_diag_i; HYPRE_Int *S_diag_j; double *S_diag_data; hypre_CSRMatrix *S_offd; HYPRE_Int *S_offd_i; HYPRE_Int *S_offd_j; double *S_offd_data; HYPRE_Int *row_starts_S; HYPRE_Int *col_starts_S; HYPRE_Int *row_starts_SN = hypre_ParCSRMatrixRowStarts(SN); HYPRE_Int *col_starts_SN = hypre_ParCSRMatrixColStarts(SN); hypre_CSRMatrix *SN_diag = hypre_ParCSRMatrixDiag(SN); HYPRE_Int *SN_diag_i = hypre_CSRMatrixI(SN_diag); HYPRE_Int *SN_diag_j = hypre_CSRMatrixJ(SN_diag); double *SN_diag_data; hypre_CSRMatrix *SN_offd = hypre_ParCSRMatrixOffd(SN); HYPRE_Int *SN_offd_i = hypre_CSRMatrixI(SN_offd); HYPRE_Int *SN_offd_j = hypre_CSRMatrixJ(SN_offd); double *SN_offd_data; HYPRE_Int *CF_marker; HYPRE_Int *col_map_offd_SN = hypre_ParCSRMatrixColMapOffd(SN); HYPRE_Int *col_map_offd_S; HYPRE_Int *dof_func; HYPRE_Int num_nodes = hypre_CSRMatrixNumRows(SN_diag); HYPRE_Int num_variables; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(SN); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_S; HYPRE_Int *send_procs_S; HYPRE_Int *send_map_starts_S; HYPRE_Int *send_map_elmts_S; HYPRE_Int *recv_procs_S; HYPRE_Int *recv_vec_starts_S; HYPRE_Int *col_offd_S_to_A = NULL; HYPRE_Int num_coarse_nodes; HYPRE_Int i,j,k,k1,jj,cnt; HYPRE_Int row, start, end; HYPRE_Int num_procs; HYPRE_Int num_cols_offd_SN = hypre_CSRMatrixNumCols(SN_offd); HYPRE_Int num_cols_offd_S; HYPRE_Int SN_num_nonzeros_diag; HYPRE_Int SN_num_nonzeros_offd; HYPRE_Int S_num_nonzeros_diag; HYPRE_Int S_num_nonzeros_offd; HYPRE_Int global_num_vars; HYPRE_Int global_num_cols; HYPRE_Int global_num_nodes; HYPRE_Int ierr = 0; hypre_MPI_Comm_size(comm, &num_procs); num_variables = num_functions*num_nodes; CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); if (nodal < 0) { cnt = 0; num_coarse_nodes = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) num_coarse_nodes++; for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } dof_func = hypre_CTAlloc(HYPRE_Int,num_coarse_nodes*num_functions); cnt = 0; for (i=0; i < num_nodes; i++) { if (CFN_marker[i] == 1) { for (k=0; k < num_functions; k++) dof_func[cnt++] = k; } } *dof_func_ptr = dof_func; } else { cnt = 0; for (i=0; i < num_nodes; i++) for (j=0; j < num_functions; j++) CF_marker[cnt++] = CFN_marker[i]; } *CF_marker_ptr = CF_marker; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,2); for (i=0; i < 2; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #else row_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) row_starts_S[i] = num_functions*row_starts_SN[i]; if (row_starts_SN != col_starts_SN) { col_starts_S = hypre_CTAlloc(HYPRE_Int,num_procs+1); for (i=0; i < num_procs+1; i++) col_starts_S[i] = num_functions*col_starts_SN[i]; } else { col_starts_S = row_starts_S; } #endif SN_num_nonzeros_diag = SN_diag_i[num_nodes]; SN_num_nonzeros_offd = SN_offd_i[num_nodes]; global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(SN); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(SN)*num_functions; global_num_vars = global_num_nodes*num_functions; S_num_nonzeros_diag = num_functions*SN_num_nonzeros_diag; S_num_nonzeros_offd = num_functions*SN_num_nonzeros_offd; num_cols_offd_S = num_functions*num_cols_offd_SN; S = hypre_ParCSRMatrixCreate(comm, global_num_vars, global_num_cols, row_starts_S, col_starts_S, num_cols_offd_S, S_num_nonzeros_diag, S_num_nonzeros_offd); S_diag = hypre_ParCSRMatrixDiag(S); S_offd = hypre_ParCSRMatrixOffd(S); S_diag_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_offd_i = hypre_CTAlloc(HYPRE_Int, num_variables+1); S_diag_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_diag); hypre_CSRMatrixI(S_diag) = S_diag_i; hypre_CSRMatrixJ(S_diag) = S_diag_j; if (data) { SN_diag_data = hypre_CSRMatrixData(SN_diag); S_diag_data = hypre_CTAlloc(double, S_num_nonzeros_diag); hypre_CSRMatrixData(S_diag) = S_diag_data; if (num_cols_offd_S) { SN_offd_data = hypre_CSRMatrixData(SN_offd); S_offd_data = hypre_CTAlloc(double, S_num_nonzeros_offd); hypre_CSRMatrixData(S_offd) = S_offd_data; } } hypre_CSRMatrixI(S_offd) = S_offd_i; if (comm_pkg) { comm_pkg_S = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_S) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_S) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_S) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_S = NULL; send_map_elmts_S = NULL; if (num_sends) { send_procs_S = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_S = hypre_CTAlloc(HYPRE_Int, num_functions*send_map_starts[num_sends]); } send_map_starts_S = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_S = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_S = NULL; if (num_recvs) recv_procs_S = hypre_CTAlloc(HYPRE_Int,num_recvs); send_map_starts_S[0] = 0; for (i=0; i < num_sends; i++) { send_procs_S[i] = send_procs[i]; send_map_starts_S[i+1] = num_functions*send_map_starts[i+1]; } recv_vec_starts_S[0] = 0; for (i=0; i < num_recvs; i++) { recv_procs_S[i] = recv_procs[i]; recv_vec_starts_S[i+1] = num_functions*recv_vec_starts[i+1]; } cnt = 0; for (i=0; i < send_map_starts[num_sends]; i++) { k1 = num_functions*send_map_elmts[i]; for (j=0; j < num_functions; j++) { send_map_elmts_S[cnt++] = k1+j; } } hypre_ParCSRCommPkgSendProcs(comm_pkg_S) = send_procs_S; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_S) = send_map_starts_S; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_S) = send_map_elmts_S; hypre_ParCSRCommPkgRecvProcs(comm_pkg_S) = recv_procs_S; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_S) = recv_vec_starts_S; hypre_ParCSRMatrixCommPkg(S) = comm_pkg_S; } if (num_cols_offd_S) { S_offd_j = hypre_CTAlloc(HYPRE_Int, S_num_nonzeros_offd); hypre_CSRMatrixJ(S_offd) = S_offd_j; col_map_offd_S = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_map_offd_SN[i]*num_functions; for (j=0; j < num_functions; j++) col_map_offd_S[cnt++] = k1+j; } hypre_ParCSRMatrixColMapOffd(S) = col_map_offd_S; } if (col_offd_SN_to_AN) { col_offd_S_to_A = hypre_CTAlloc(HYPRE_Int, num_cols_offd_S); cnt = 0; for (i=0; i < num_cols_offd_SN; i++) { k1 = col_offd_SN_to_AN[i]*num_functions; for (j=0; j < num_functions; j++) col_offd_S_to_A[cnt++] = k1+j; } *col_offd_S_to_A_ptr = col_offd_S_to_A; } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_diag_i[i]; j < SN_diag_i[i+1]; j++) { jj = SN_diag_j[j]; if (data) S_diag_data[cnt] = SN_diag_data[j]; S_diag_j[cnt++] = jj*num_functions; } end = cnt; S_diag_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_diag_data[cnt] = S_diag_data[k]; S_diag_j[cnt++] = S_diag_j[k]+k1; } S_diag_i[row] = cnt; } } cnt = 0; row = 0; for (i=0; i < num_nodes; i++) { row++; start = cnt; for (j=SN_offd_i[i]; j < SN_offd_i[i+1]; j++) { jj = SN_offd_j[j]; if (data) S_offd_data[cnt] = SN_offd_data[j]; S_offd_j[cnt++] = jj*num_functions; } end = cnt; S_offd_i[row] = cnt; for (k1=1; k1 < num_functions; k1++) { row++; for (k=start; k < end; k++) { if (data) S_offd_data[cnt] = S_offd_data[k]; S_offd_j[cnt++] = S_offd_j[k]+k1; } S_offd_i[row] = cnt; } } *S_ptr = S; return (ierr); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
void hypre_ParMatScaleDiagInv_F( hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * A, double weight, HYPRE_Int * CF_marker ) /* hypre_ParMatScaleDiagInv scales certain rows of its first * argument by premultiplying with a submatrix of the inverse of * the diagonal of its second argument; and _also_ multiplying by the scalar * third argument. * The marker array determines rows are changed and which diagonal elements * are used. */ { /* If A=(Aij),C=(Cik), i&j in Fine+Coarse, k in Coarse, we want new Cik = (1/aii)*Cik, for Fine i only, all k. Unlike a matmul, this computation is purely local, only the diag blocks are involved. */ hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); double *C_diag_data = hypre_CSRMatrixData(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int i1, i2; HYPRE_Int jj2, jj3; double a_entry; /*----------------------------------------------------------------------- * Loop over C_diag rows. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_C; i1++) { if ( CF_marker[i1] < 0 ) /* Fine data only */ { /*----------------------------------------------------------------- * Loop over A_diag data *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if ( i1==i2 ) /* diagonal of A only */ { a_entry = A_diag_data[jj2] * weight; /*----------------------------------------------------------- * Loop over entries in current row of C_diag. *-----------------------------------------------------------*/ for (jj3 = C_diag_i[i2]; jj3 < C_diag_i[i2+1]; jj3++) { C_diag_data[jj3] = C_diag_data[jj3] / a_entry; } /*----------------------------------------------------------- * Loop over entries in current row of C_offd. *-----------------------------------------------------------*/ if ( num_cols_offd_C ) { for (jj3 = C_offd_i[i2]; jj3 < C_offd_i[i2+1]; jj3++) { C_offd_data[jj3] = C_offd_data[jj3] / a_entry; } } } } } } }
/* Delete any matrix entry C(i,j) for which the corresponding entry P(i,j) doesn't exist - but only for "fine" rows C(i)<0 This is done as a purely local computation - C and P must have the same data distribution (among processors). */ void hypre_ParCSRMatrixDropEntries( hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker ) { hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_C_diag_i; HYPRE_Int *new_C_offd_i; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(C_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(C_offd); double vmax = 0.0; double vmin = 0.0; double v, old_sum, new_sum, scale; HYPRE_Int i1, m, m1d, m1o, jC, mP, keep; /* Repack the i,j,and data arrays of C so as to discard those elements for which there is no corresponding element in P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_C_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_C+1 ); new_C_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_C+1 ); m1d = C_diag_i[0]; m1o = C_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { old_sum = 0; new_sum = 0; for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m ) { v = C_diag_data[m]; jC = C_diag_j[m]; old_sum += v; /* Do we know anything about the order of P_diag_j? It would be better not to search through it all here. If we know nothing, some ordering or index scheme will be needed for efficiency (worth doing iff this function gets called at all ) (may2006: this function is no longer called) */ keep=0; for ( mP=P_diag_i[i1]; mP<P_diag_i[i1+1]; ++mP ) { if ( jC==P_diag_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || keep==1 ) { /* keep v in C */ new_sum += v; C_diag_j[m1d] = C_diag_j[m]; C_diag_data[m1d] = C_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m ) { v = C_offd_data[m]; jC = C_diag_j[m]; old_sum += v; keep=0; for ( mP=P_offd_i[i1]; mP<P_offd_i[i1+1]; ++mP ) { if ( jC==P_offd_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || v>=vmax || v<=vmin ) { /* keep v in C */ new_sum += v; C_offd_j[m1o] = C_offd_j[m]; C_offd_data[m1o] = C_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_C_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_C ) new_C_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_C_diag_i[i1]; m<new_C_diag_i[i1+1]; ++m ) C_diag_data[m] *= scale; if ( i1<num_rows_offd_C ) /* this test fails when there is no offd block */ for ( m=new_C_offd_i[i1]; m<new_C_offd_i[i1+1]; ++m ) C_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_C; i1++ ) { C_diag_i[i1] = new_C_diag_i[i1]; if ( i1<num_rows_offd_C ) C_offd_i[i1] = new_C_offd_i[i1]; } hypre_TFree( new_C_diag_i ); if ( num_rows_offd_C>0 ) hypre_TFree( new_C_offd_i ); hypre_CSRMatrixNumNonzeros(C_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(C_offd) = num_nonzeros_offd; /* SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce. I suspect, but don't know, that other parts of hypre do not assume that the correct values have been set. hypre_ParCSRMatrixSetNumNonzeros( C ); hypre_ParCSRMatrixSetDNumNonzeros( C );*/ hypre_ParCSRMatrixNumNonzeros( C ) = 0; hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0; }
int hypre_CSRMatrixMatvec_FF( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y, int *CF_marker_x, int *CF_marker_y, int fpt ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); double temp; int i, jj; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] = 0.0; } else { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ for (i = 0; i < num_rows; i++) { if (CF_marker_x[i] == fpt) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) if (CF_marker_y[A_j[jj]] == fpt) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows; i++) if (CF_marker_x[i] == fpt) y_data[i] *= alpha; } return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; int i, i1, j, jv, jj, ns, ne, size, rest; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { for (i1 = 0; i1 < num_threads; i1++) { size = num_cols/num_threads; rest = num_cols - size*num_threads; if (i1 < rest) { ns = i1*size+i1-1; ne = (i1+1)*size+i1+1; } else { ns = i1*size+rest-1; ne = (i1+1)*size+rest; } if ( num_vectors==1 ) { for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[j] += A_data[jj] * x_data[i]; } } } else { for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } } } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
HYPRE_Int AmgCGCGraphAssemble (hypre_ParCSRMatrix *S,HYPRE_Int *vertexrange,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd,HYPRE_Int coarsen_type, HYPRE_IJMatrix *ijG) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * vertexrange : the parallel layout of the candidate coarse grid vertices * CF_marker, CF_marker_offd : the coarse/fine markers * coarsen_type : the coarsening type * ijG : the created graph * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int i,/* ii,*/ip,j,jj,m,n,p; HYPRE_Int mpisize,mpirank; HYPRE_Real weight; MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ HYPRE_IJMatrix ijmatrix; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); /* HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); */ /* HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); */ HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); HYPRE_Int pointrange_start,pointrange_end; HYPRE_Int *pointrange,*pointrange_nonlocal,*pointrange_strong=NULL; HYPRE_Int vertexrange_start,vertexrange_end; HYPRE_Int *vertexrange_strong= NULL; HYPRE_Int *vertexrange_nonlocal; HYPRE_Int num_recvs,num_recvs_strong; HYPRE_Int *recv_procs,*recv_procs_strong=NULL; HYPRE_Int /* *zeros,*rownz,*/*rownz_diag,*rownz_offd; HYPRE_Int nz; HYPRE_Int nlocal; HYPRE_Int one=1; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs (comm_pkg); pointrange = hypre_ParCSRMatrixRowStarts (S); pointrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); vertexrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs (comm_pkg); HYPRE_Int *int_buf_data = hypre_CTAlloc (HYPRE_Int,4*num_sends); HYPRE_Int *int_buf_data2 = int_buf_data + 2*num_sends; hypre_MPI_Request *sendrequest,*recvrequest; nlocal = vertexrange[1] - vertexrange[0]; pointrange_start = pointrange[0]; pointrange_end = pointrange[1]; vertexrange_start = vertexrange[0]; vertexrange_end = vertexrange[1]; sendrequest = hypre_CTAlloc (hypre_MPI_Request,2*(num_sends+num_recvs)); recvrequest = sendrequest+2*num_sends; for (i=0;i<num_recvs;i++) { hypre_MPI_Irecv (pointrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_pointrange,comm,&recvrequest[2*i]); hypre_MPI_Irecv (vertexrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_vertexrange,comm,&recvrequest[2*i+1]); } for (i=0;i<num_sends;i++) { int_buf_data[2*i] = pointrange_start; int_buf_data[2*i+1] = pointrange_end; int_buf_data2[2*i] = vertexrange_start; int_buf_data2[2*i+1] = vertexrange_end; hypre_MPI_Isend (int_buf_data+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_pointrange,comm,&sendrequest[2*i]); hypre_MPI_Isend (int_buf_data2+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_vertexrange,comm,&sendrequest[2*i+1]); } hypre_MPI_Waitall (2*(num_sends+num_recvs),sendrequest,hypre_MPI_STATUSES_IGNORE); hypre_TFree (int_buf_data); hypre_TFree (sendrequest); } #else nlocal = vertexrange[mpirank+1] - vertexrange[mpirank]; pointrange_start = pointrange[mpirank]; pointrange_end = pointrange[mpirank+1]; vertexrange_start = vertexrange[mpirank]; vertexrange_end = vertexrange[mpirank+1]; for (i=0;i<num_recvs;i++) { pointrange_nonlocal[2*i] = pointrange[recv_procs[i]]; pointrange_nonlocal[2*i+1] = pointrange[recv_procs[i]+1]; vertexrange_nonlocal[2*i] = vertexrange[recv_procs[i]]; vertexrange_nonlocal[2*i+1] = vertexrange[recv_procs[i]+1]; } #endif /* now we have the array recv_procs. However, it may contain too many entries as it is inherited from A. We now have to determine the subset which contains only the strongly connected neighbors */ if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); recv_procs_strong = hypre_CTAlloc (HYPRE_Int,num_recvs); memset (recv_procs_strong,0,num_recvs*sizeof(HYPRE_Int)); /* don't forget to shorten the pointrange and vertexrange arrays accordingly */ pointrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (pointrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); vertexrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (vertexrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); for (i=0;i<num_variables;i++) for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = col_map_offd[S_offd_j[j]]; for (p=0;p<num_recvs;p++) /* S_offd_j is NOT sorted! */ if (jj >= pointrange_nonlocal[2*p] && jj < pointrange_nonlocal[2*p+1]) break; #if 0 hypre_printf ("Processor %d, remote point %d on processor %d\n",mpirank,jj,recv_procs[p]); #endif recv_procs_strong [p]=1; } for (p=0,num_recvs_strong=0;p<num_recvs;p++) { if (recv_procs_strong[p]) { recv_procs_strong[num_recvs_strong]=recv_procs[p]; pointrange_strong[2*num_recvs_strong] = pointrange_nonlocal[2*p]; pointrange_strong[2*num_recvs_strong+1] = pointrange_nonlocal[2*p+1]; vertexrange_strong[2*num_recvs_strong] = vertexrange_nonlocal[2*p]; vertexrange_strong[2*num_recvs_strong+1] = vertexrange_nonlocal[2*p+1]; num_recvs_strong++; } } } else num_recvs_strong=0; hypre_TFree (pointrange_nonlocal); hypre_TFree (vertexrange_nonlocal); rownz_diag = hypre_CTAlloc (HYPRE_Int,2*nlocal); rownz_offd = rownz_diag + nlocal; for (p=0,nz=0;p<num_recvs_strong;p++) { nz += vertexrange_strong[2*p+1]-vertexrange_strong[2*p]; } for (m=0;m<nlocal;m++) { rownz_diag[m]=nlocal-1; rownz_offd[m]=nz; } HYPRE_IJMatrixCreate(comm, vertexrange_start, vertexrange_end-1, vertexrange_start, vertexrange_end-1, &ijmatrix); HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); HYPRE_IJMatrixSetDiagOffdSizes (ijmatrix, rownz_diag, rownz_offd); HYPRE_IJMatrixInitialize(ijmatrix); hypre_TFree (rownz_diag); /* initialize graph */ weight = -1; for (m=vertexrange_start;m<vertexrange_end;m++) { for (p=0;p<num_recvs_strong;p++) { for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while initializing graphs at (%d, %d)\n",mpirank,ierr,m,n); #endif } } } /* weight graph */ for (i=0;i<num_variables;i++) { for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = S_offd_j[j]; /* jj is not a global index!!! */ /* determine processor */ for (p=0;p<num_recvs_strong;p++) if (col_map_offd[jj] >= pointrange_strong[2*p] && col_map_offd[jj] < pointrange_strong[2*p+1]) break; ip=recv_procs_strong[p]; /* loop over all coarse grids constructed on this processor domain */ for (m=vertexrange_start;m<vertexrange_end;m++) { /* loop over all coarse grids constructed on neighbor processor domain */ for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { /* coarse grid counting inside gridpartition->local/gridpartition->nonlocal starts with one while counting inside range starts with zero */ if (CF_marker[i]-1==m && CF_marker_offd[jj]-1==n) /* C-C-coupling */ weight = -1; else if ( (CF_marker[i]-1==m && (CF_marker_offd[jj]==0 || CF_marker_offd[jj]-1!=n) ) || ( (CF_marker[i]==0 || CF_marker[i]-1!=m) && CF_marker_offd[jj]-1==n ) ) /* C-F-coupling */ weight = 0; else weight = -8; /* F-F-coupling */ ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while adding %lf to entry (%d, %d)\n",mpirank,ierr,weight,m,n); #endif } } } } /* assemble */ HYPRE_IJMatrixAssemble (ijmatrix); /*if (num_recvs_strong) {*/ hypre_TFree (recv_procs_strong); hypre_TFree (pointrange_strong); hypre_TFree (vertexrange_strong); /*} */ *ijG = ijmatrix; return (ierr); }
HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * nlocal : the number of locally created coarse grids * CF_marker, CF_marker_offd : the coare/fine markers * coarsen_type : the coarsening type * vrange : the ranges of the vertices representing coarse grids * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int mpisize,mpirank; HYPRE_Int num_sends; HYPRE_Int *vertexrange=NULL; HYPRE_Int vstart,vend; HYPRE_Int *int_buf_data; HYPRE_Int start; HYPRE_Int i,ii,j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_ParCSRCommHandle *comm_handle; hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); if (!comm_pkg) { hypre_MatvecCommPkgCreate (S); comm_pkg = hypre_ParCSRMatrixCommPkg (S); } num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */ #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int,2); hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ vertexrange[1] = scan_recv; vstart = vertexrange[0]; vend = vertexrange[1]; } #else vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm); vertexrange[0]=0; for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1]; vstart = vertexrange[mpirank]; vend = vertexrange[mpirank+1]; #endif /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */ if (coarsen_type % 2 == 1) { /* see above */ for (i=0;i<num_variables;i++) if (CF_marker[i]>0) CF_marker[i]+=vstart; } else { /* hypre_printf ("processor %d: empty grid allowed\n",mpirank); */ for (i=0;i<num_variables;i++) { if (CF_marker[i]>0) CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid. Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in global indices, ... */ } } /* exchange data */ *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd); int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends)); for (i=0,ii=0;i<num_sends;i++) { start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i); for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++) int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (mpisize>1) { comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd); hypre_ParCSRCommHandleDestroy (comm_handle); } hypre_TFree (int_buf_data); *vrange=vertexrange; return (ierr); }
/************************************************************** * * CGC Coarsening routine * **************************************************************/ HYPRE_Int hypre_BoomerAMGCoarsenCGCb( hypre_ParCSRMatrix *S, hypre_ParCSRMatrix *A, HYPRE_Int measure_type, HYPRE_Int coarsen_type, HYPRE_Int cgc_its, HYPRE_Int debug_flag, HYPRE_Int **CF_marker_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(S); hypre_ParCSRCommHandle *comm_handle; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag(S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd(S); HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows(S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(S_offd); hypre_CSRMatrix *S_ext; HYPRE_Int *S_ext_i; HYPRE_Int *S_ext_j; hypre_CSRMatrix *ST; HYPRE_Int *ST_i; HYPRE_Int *ST_j; HYPRE_Int *CF_marker; HYPRE_Int *CF_marker_offd=NULL; HYPRE_Int ci_tilde = -1; HYPRE_Int ci_tilde_mark = -1; HYPRE_Int *measure_array; HYPRE_Int *measure_array_master; HYPRE_Int *graph_array; HYPRE_Int *int_buf_data=NULL; /*HYPRE_Int *ci_array=NULL;*/ HYPRE_Int i, j, k, l, jS; HYPRE_Int ji, jj, index; HYPRE_Int set_empty = 1; HYPRE_Int C_i_nonempty = 0; HYPRE_Int num_nonzeros; HYPRE_Int num_procs, my_id; HYPRE_Int num_sends = 0; HYPRE_Int first_col, start; HYPRE_Int col_0, col_n; hypre_LinkList LoL_head; hypre_LinkList LoL_tail; HYPRE_Int *lists, *where; HYPRE_Int measure, new_meas; HYPRE_Int num_left; HYPRE_Int nabor, nabor_two; HYPRE_Int ierr = 0; HYPRE_Int use_commpkg_A = 0; HYPRE_Real wall_time; HYPRE_Int measure_max; /* BM Aug 30, 2006: maximal measure, needed for CGC */ if (coarsen_type < 0) coarsen_type = -coarsen_type; /*------------------------------------------------------- * Initialize the C/F marker, LoL_head, LoL_tail arrays *-------------------------------------------------------*/ LoL_head = NULL; LoL_tail = NULL; lists = hypre_CTAlloc(HYPRE_Int, num_variables); where = hypre_CTAlloc(HYPRE_Int, num_variables); #if 0 /* debugging */ char filename[256]; FILE *fp; HYPRE_Int iter = 0; #endif /*-------------------------------------------------------------- * Compute a CSR strength matrix, S. * * For now, the "strength" of dependence/influence is defined in * the following way: i depends on j if * aij > hypre_max (k != i) aik, aii < 0 * or * aij < hypre_min (k != i) aik, aii >= 0 * Then S_ij = 1, else S_ij = 0. * * NOTE: the entries are negative initially, corresponding * to "unaccounted-for" dependence. *----------------------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (!comm_pkg) { use_commpkg_A = 1; comm_pkg = hypre_ParCSRMatrixCommPkg(A); } if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_cols_offd) S_offd_j = hypre_CSRMatrixJ(S_offd); jS = S_i[num_variables]; ST = hypre_CSRMatrixCreate(num_variables, num_variables, jS); ST_i = hypre_CTAlloc(HYPRE_Int,num_variables+1); ST_j = hypre_CTAlloc(HYPRE_Int,jS); hypre_CSRMatrixI(ST) = ST_i; hypre_CSRMatrixJ(ST) = ST_j; /*---------------------------------------------------------- * generate transpose of S, ST *----------------------------------------------------------*/ for (i=0; i <= num_variables; i++) ST_i[i] = 0; for (i=0; i < jS; i++) { ST_i[S_j[i]+1]++; } for (i=0; i < num_variables; i++) { ST_i[i+1] += ST_i[i]; } for (i=0; i < num_variables; i++) { for (j=S_i[i]; j < S_i[i+1]; j++) { index = S_j[j]; ST_j[ST_i[index]] = i; ST_i[index]++; } } for (i = num_variables; i > 0; i--) { ST_i[i] = ST_i[i-1]; } ST_i[0] = 0; /*---------------------------------------------------------- * Compute the measures * * The measures are given by the row sums of ST. * Hence, measure_array[i] is the number of influences * of variable i. * correct actual measures through adding influences from * neighbor processors *----------------------------------------------------------*/ measure_array_master = hypre_CTAlloc(HYPRE_Int, num_variables); measure_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { measure_array_master[i] = ST_i[i+1]-ST_i[i]; } if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) { if (use_commpkg_A) S_ext = hypre_ParCSRMatrixExtractBExt(S,A,0); else S_ext = hypre_ParCSRMatrixExtractBExt(S,S,0); S_ext_i = hypre_CSRMatrixI(S_ext); S_ext_j = hypre_CSRMatrixJ(S_ext); num_nonzeros = S_ext_i[num_cols_offd]; first_col = hypre_ParCSRMatrixFirstColDiag(S); col_0 = first_col-1; col_n = col_0+num_variables; if (measure_type) { for (i=0; i < num_nonzeros; i++) { index = S_ext_j[i] - first_col; if (index > -1 && index < num_variables) measure_array_master[index]++; } } } /*--------------------------------------------------- * Loop until all points are either fine or coarse. *---------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); /* first coarsening phase */ /************************************************************* * * Initialize the lists * *************************************************************/ CF_marker = hypre_CTAlloc(HYPRE_Int, num_variables); num_left = 0; for (j = 0; j < num_variables; j++) { if ((S_i[j+1]-S_i[j])== 0 && (S_offd_i[j+1]-S_offd_i[j]) == 0) { CF_marker[j] = SF_PT; measure_array_master[j] = 0; } else { CF_marker[j] = UNDECIDED; /* num_left++; */ /* BM May 19, 2006: see below*/ } } if (coarsen_type==22) { /* BM Sep 8, 2006: allow_emptygrids only if the following holds for all points j: (a) the point has no strong connections at all, OR (b) the point has a strong connection across a boundary */ for (j=0;j<num_variables;j++) if (S_i[j+1]>S_i[j] && S_offd_i[j+1] == S_offd_i[j]) {coarsen_type=21;break;} } for (l = 1; l <= cgc_its; l++) { LoL_head = NULL; LoL_tail = NULL; num_left = 0; /* compute num_left before each RS coarsening loop */ memcpy (measure_array,measure_array_master,num_variables*sizeof(HYPRE_Int)); memset (lists,0,sizeof(HYPRE_Int)*num_variables); memset (where,0,sizeof(HYPRE_Int)*num_variables); for (j = 0; j < num_variables; j++) { measure = measure_array[j]; if (CF_marker[j] != SF_PT) { if (measure > 0) { enter_on_lists(&LoL_head, &LoL_tail, measure, j, lists, where); num_left++; /* compute num_left before each RS coarsening loop */ } else if (CF_marker[j] == 0) /* increase weight of strongly coupled neighbors only if j is not conained in a previously constructed coarse grid. Reason: these neighbors should start with the same initial weight in each CGC iteration. BM Aug 30, 2006 */ { if (measure < 0) hypre_printf("negative measure!\n"); /* CF_marker[j] = f_pnt; */ for (k = S_i[j]; k < S_i[j+1]; k++) { nabor = S_j[k]; /* if (CF_marker[nabor] != SF_PT) */ if (CF_marker[nabor] == 0) /* BM Aug 30, 2006: don't alter weights of points contained in other candidate coarse grids */ { if (nabor < j) { new_meas = measure_array[nabor]; if (new_meas > 0) remove_point(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); else num_left++; /* BM Aug 29, 2006 */ new_meas = ++(measure_array[nabor]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor, lists, where); } else { new_meas = ++(measure_array[nabor]); } } } /* --num_left; */ /* BM May 19, 2006 */ } } } /* BM Aug 30, 2006: first iteration: determine maximal weight */ if (num_left && l==1) measure_max = measure_array[LoL_head->head]; /* BM Aug 30, 2006: break CGC iteration if no suitable starting point is available any more */ if (!num_left || measure_array[LoL_head->head]<measure_max) { while (LoL_head) { hypre_LinkList list_ptr = LoL_head; LoL_head = LoL_head->next_elt; dispose_elt (list_ptr); } break; } /**************************************************************** * * Main loop of Ruge-Stueben first coloring pass. * * WHILE there are still points to classify DO: * 1) find first point, i, on list with max_measure * make i a C-point, remove it from the lists * 2) For each point, j, in S_i^T, * a) Set j to be an F-point * b) For each point, k, in S_j * move k to the list in LoL with measure one * greater than it occupies (creating new LoL * entry if necessary) * 3) For each point, j, in S_i, * move j to the list in LoL with measure one * smaller than it occupies (creating new LoL * entry if necessary) * ****************************************************************/ while (num_left > 0) { index = LoL_head -> head; /* index = LoL_head -> tail; */ /* CF_marker[index] = C_PT; */ CF_marker[index] = l; /* BM Aug 18, 2006 */ measure = measure_array[index]; measure_array[index] = 0; measure_array_master[index] = 0; /* BM May 19: for CGC */ --num_left; remove_point(&LoL_head, &LoL_tail, measure, index, lists, where); for (j = ST_i[index]; j < ST_i[index+1]; j++) { nabor = ST_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ measure = measure_array[nabor]; measure_array[nabor]=0; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) /* undecided point */ { measure = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, measure, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } for (j = S_i[index]; j < S_i[index+1]; j++) { nabor = S_j[j]; /* if (CF_marker[nabor] == UNDECIDED) */ if (measure_array[nabor]>0) /* undecided point */ { measure = measure_array[nabor]; remove_point(&LoL_head, &LoL_tail, measure, nabor, lists, where); measure_array[nabor] = --measure; if (measure > 0) enter_on_lists(&LoL_head, &LoL_tail, measure, nabor, lists, where); else { /* CF_marker[nabor] = F_PT; */ /* BM Aug 18, 2006 */ --num_left; for (k = S_i[nabor]; k < S_i[nabor+1]; k++) { nabor_two = S_j[k]; /* if (CF_marker[nabor_two] == UNDECIDED) */ if (measure_array[nabor_two]>0) { new_meas = measure_array[nabor_two]; remove_point(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); new_meas = ++(measure_array[nabor_two]); enter_on_lists(&LoL_head, &LoL_tail, new_meas, nabor_two, lists, where); } } } } } } if (LoL_head) hypre_printf ("Linked list not empty! head: %d\n",LoL_head->head); } l--; /* BM Aug 15, 2006 */ hypre_TFree(measure_array); hypre_TFree(measure_array_master); hypre_CSRMatrixDestroy(ST); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 1st pass = %f\n", my_id, wall_time); } hypre_TFree(lists); hypre_TFree(where); if (num_procs>1) { if (debug_flag == 3) wall_time = time_getWallclockSeconds(); hypre_BoomerAMGCoarsenCGC (S,l,coarsen_type,CF_marker); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen CGC = %f\n", my_id, wall_time); } } else { /* the first candiate coarse grid is the coarse grid */ for (j=0;j<num_variables;j++) { if (CF_marker[j]==1) CF_marker[j]=C_PT; else CF_marker[j]=F_PT; } } /* BM May 19, 2006: Set all undecided points to be fine grid points. */ for (j=0;j<num_variables;j++) if (!CF_marker[j]) CF_marker[j]=F_PT; /*--------------------------------------------------- * Initialize the graph array *---------------------------------------------------*/ graph_array = hypre_CTAlloc(HYPRE_Int, num_variables); for (i = 0; i < num_variables; i++) { graph_array[i] = -1; } if (debug_flag == 3) wall_time = time_getWallclockSeconds(); for (i=0; i < num_variables; i++) { if (ci_tilde_mark != i) ci_tilde = -1; if (CF_marker[i] == -1) { for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] > 0) graph_array[j] = i; } for (ji = S_i[i]; ji < S_i[i+1]; ji++) { j = S_j[ji]; if (CF_marker[j] == -1) { set_empty = 1; for (jj = S_i[j]; jj < S_i[j+1]; jj++) { index = S_j[jj]; if (graph_array[index] == i) { set_empty = 0; break; } } if (set_empty) { if (C_i_nonempty) { CF_marker[i] = 1; if (ci_tilde > -1) { CF_marker[ci_tilde] = -1; ci_tilde = -1; } C_i_nonempty = 0; break; } else { ci_tilde = j; ci_tilde_mark = i; CF_marker[j] = 1; C_i_nonempty = 1; i--; break; } } } } } } if (debug_flag == 3 && coarsen_type != 2) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d Coarsen 2nd pass = %f\n", my_id, wall_time); } /* third pass, check boundary fine points for coarse neighbors */ /*------------------------------------------------ * Exchange boundary data for CF_marker *------------------------------------------------*/ if (debug_flag == 3) wall_time = time_getWallclockSeconds(); CF_marker_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (num_procs > 1) { comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, CF_marker_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } AmgCGCBoundaryFix (S,CF_marker,CF_marker_offd); if (debug_flag == 3) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf("Proc = %d CGC boundary fix = %f\n", my_id, wall_time); } /*--------------------------------------------------- * Clean up and return *---------------------------------------------------*/ /*if (coarsen_type != 1) { */ if (CF_marker_offd) hypre_TFree(CF_marker_offd); /* BM Aug 21, 2006 */ if (int_buf_data) hypre_TFree(int_buf_data); /* BM Aug 21, 2006 */ /*if (ci_array) hypre_TFree(ci_array);*/ /* BM Aug 21, 2006 */ /*} */ hypre_TFree(graph_array); if ((measure_type || (coarsen_type != 1 && coarsen_type != 11)) && num_procs > 1) hypre_CSRMatrixDestroy(S_ext); *CF_marker_ptr = CF_marker; return (ierr); }
hypre_ParCSRMatrix * hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix); HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag); HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd); hypre_ParCSRMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; HYPRE_Int *counter, *new_j_map; HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0; HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd; HYPRE_Int num_procs, i, j; hypre_CSRMatrix *diag_nozeros, *offd_nozeros; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #endif matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, global_num_cols*block_size, matrix_C_row_starts, matrix_C_col_starts, num_cols_offd*block_size, num_nonzeros_diag*block_size*block_size, num_nonzeros_offd*block_size*block_size); hypre_ParCSRMatrixInitialize(matrix_C); /* DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(diag); /* AB - added to delete zeros */ diag_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixDiag(matrix_C), 1e-14); if(diag_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros; } /* OFF-DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(offd); /* AB - added to delete zeros - this just deletes from data and j arrays */ offd_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixOffd(matrix_C), 1e-14); if(offd_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros; removed = 1; } /* now convert the col_map_offd */ for (i = 0; i < num_cols_offd; i++) for (j = 0; j < block_size; j++) hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j; /* if we deleted zeros, then it is possible that col_map_offd can be compressed as well - this requires some amount of work that could be skipped... */ if (removed) { size_map = num_cols_offd*block_size; counter = hypre_CTAlloc(HYPRE_Int, size_map); new_j_map = hypre_CTAlloc(HYPRE_Int, size_map); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C)); col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C); size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C)); /* mark which off_d entries are found in j */ for (i=0; i < size_j; i++) { counter[offd_j[i]] = 1; } /*now find new numbering for columns (we will delete the cols where counter = 0*/ index = 0; for (i=0; i < size_map; i++) { if (counter[i]) new_j_map[i] = index++; } new_num_cols = index; /* if there are some col entries to remove: */ if (!(index == size_map)) { /* go thru j and adjust entries */ for (i=0; i < size_j; i++) { offd_j[i] = new_j_map[offd_j[i]]; } /*now go thru col map and get rid of non-needed entries */ new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols); index = 0; for (i=0; i < size_map; i++) { if (counter[i]) { new_col_map_offd[index++] = col_map_offd[i]; } } /* set the new col map */ hypre_TFree(col_map_offd); hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd; /* modify the number of cols */ hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols; } hypre_TFree(new_j_map); hypre_TFree(counter); } hypre_ParCSRMatrixSetNumNonzeros( matrix_C ); hypre_ParCSRMatrixSetDNumNonzeros( matrix_C ); /* we will not copy the comm package */ hypre_ParCSRMatrixCommPkg(matrix_C) = NULL; return matrix_C; }
int hypre_CSRMatrixMatvec( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); int *A_rownnz = hypre_CSRMatrixRownnz(A); int num_rownnz = hypre_CSRMatrixNumRownnz(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp, tempx; int i, j, jj; int m; double xpar=0.7; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. Matvec returns ierr = 1 if * length of X doesn't equal the number of columns of A, * ierr = 2 if the length of Y doesn't equal the number of rows * of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in Matvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_cols != x_size) ierr = 1; if (num_rows != y_size) ierr = 2; if (num_cols != x_size && num_rows != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A*x *-----------------------------------------------------------------*/ /* use rownnz pointer to do the A*x multiplication when num_rownnz is smaller than num_rows */ if (num_rownnz < xpar*(num_rows)) { for (i = 0; i < num_rownnz; i++) { m = A_rownnz[i]; /* * for (jj = A_i[m]; jj < A_i[m+1]; jj++) * { * j = A_j[jj]; * y_data[m] += A_data[jj] * x_data[j]; * } */ if ( num_vectors==1 ) { tempx = y_data[m]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[A_j[jj]]; y_data[m] = tempx; } else for ( j=0; j<num_vectors; ++j ) { tempx = y_data[ j*vecstride_y + m*idxstride_y ]; for (jj = A_i[m]; jj < A_i[m+1]; jj++) tempx += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; y_data[ j*vecstride_y + m*idxstride_y] = tempx; } } } else { #pragma omp parallel for private(i,jj,temp) schedule(static) for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { temp = y_data[i]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) temp += A_data[jj] * x_data[A_j[jj]]; y_data[i] = temp; } else for ( j=0; j<num_vectors; ++j ) { temp = y_data[ j*vecstride_y + i*idxstride_y ]; for (jj = A_i[i]; jj < A_i[i+1]; jj++) { temp += A_data[jj] * x_data[ j*vecstride_x + A_j[jj]*idxstride_x ]; } y_data[ j*vecstride_y + i*idxstride_y ] = temp; } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_rows*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
HYPRE_Int hypre_SchwarzSetup(void *schwarz_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) { hypre_SchwarzData *schwarz_data = schwarz_vdata; HYPRE_Int *dof_func; double *scale; hypre_CSRMatrix *domain_structure; hypre_CSRMatrix *A_boundary; hypre_ParVector *Vtemp; HYPRE_Int *pivots = NULL; HYPRE_Int variant = hypre_SchwarzDataVariant(schwarz_data); HYPRE_Int domain_type = hypre_SchwarzDataDomainType(schwarz_data); HYPRE_Int overlap = hypre_SchwarzDataOverlap(schwarz_data); HYPRE_Int num_functions = hypre_SchwarzDataNumFunctions(schwarz_data); double relax_weight = hypre_SchwarzDataRelaxWeight(schwarz_data); HYPRE_Int use_nonsymm = hypre_SchwarzDataUseNonSymm(schwarz_data); dof_func = hypre_SchwarzDataDofFunc(schwarz_data); Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A), hypre_ParCSRMatrixGlobalNumRows(A), hypre_ParCSRMatrixRowStarts(A)); hypre_ParVectorSetPartitioningOwner(Vtemp,0); hypre_ParVectorInitialize(Vtemp); hypre_SchwarzDataVtemp(schwarz_data) = Vtemp; if (variant > 1) { hypre_ParAMGCreateDomainDof(A, domain_type, overlap, num_functions, dof_func, &domain_structure, &pivots, use_nonsymm); if (variant == 2) { hypre_ParGenerateScale(A, domain_structure, relax_weight, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; } else { hypre_ParGenerateHybridScale(A, domain_structure, &A_boundary, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; if (hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(A))) hypre_SchwarzDataABoundary(schwarz_data) = A_boundary; else hypre_SchwarzDataABoundary(schwarz_data) = NULL; } } else { hypre_AMGCreateDomainDof (hypre_ParCSRMatrixDiag(A), domain_type, overlap, num_functions, dof_func, &domain_structure, &pivots, use_nonsymm); if (variant == 1) { hypre_GenerateScale(domain_structure, hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A)), relax_weight, &scale); hypre_SchwarzDataScale(schwarz_data) = scale; } } hypre_SchwarzDataDomainStructure(schwarz_data) = domain_structure; hypre_SchwarzDataPivots(schwarz_data) = pivots; return hypre_error_flag; }
hypre_ParCSRMatrix * hypre_ParMatmul_FC( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd ) /* hypre_parMatmul_FC creates and returns the "Fine"-designated rows of the matrix product A*P. A's size is (nC+nF)*(nC+nF), P's size is (nC+nF)*nC where nC is the number of coarse rows/columns, nF the number of fine rows/columns. The size of C=A*P is (nC+nF)*nC, even though not all rows of C are actually computed. If we were to construct a matrix consisting only of the computed rows of C, its size would be nF*nC. "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. */ { /* To compute a submatrix of C containing only the computed data, i.e. only "Fine" rows, we would have to do a lot of computational work, with a lot of communication. The communication is because such a matrix would need global information that depends on which rows are "Fine". */ MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); double *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts_A = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int num_rows_diag_A = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_cols_diag_A = hypre_CSRMatrixNumCols(A_diag); HYPRE_Int num_cols_offd_A = hypre_CSRMatrixNumCols(A_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *col_map_offd_P = hypre_ParCSRMatrixColMapOffd(P); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int first_col_diag_P = hypre_ParCSRMatrixFirstColDiag(P); HYPRE_Int last_col_diag_P; HYPRE_Int *col_starts_P = hypre_ParCSRMatrixColStarts(P); HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_cols_diag_P = hypre_CSRMatrixNumCols(P_diag); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); hypre_ParCSRMatrix *C; HYPRE_Int *col_map_offd_C; HYPRE_Int *map_P_to_C; hypre_CSRMatrix *C_diag; double *C_diag_data; HYPRE_Int *C_diag_i; HYPRE_Int *C_diag_j; hypre_CSRMatrix *C_offd; double *C_offd_data=NULL; HYPRE_Int *C_offd_i=NULL; HYPRE_Int *C_offd_j=NULL; HYPRE_Int C_diag_size; HYPRE_Int C_offd_size; HYPRE_Int num_cols_offd_C = 0; hypre_CSRMatrix *Ps_ext; double *Ps_ext_data; HYPRE_Int *Ps_ext_i; HYPRE_Int *Ps_ext_j; double *P_ext_diag_data; HYPRE_Int *P_ext_diag_i; HYPRE_Int *P_ext_diag_j; HYPRE_Int P_ext_diag_size; double *P_ext_offd_data; HYPRE_Int *P_ext_offd_i; HYPRE_Int *P_ext_offd_j; HYPRE_Int P_ext_offd_size; HYPRE_Int *P_marker; HYPRE_Int *temp; HYPRE_Int i, j; HYPRE_Int i1, i2, i3; HYPRE_Int jj2, jj3; HYPRE_Int jj_count_diag, jj_count_offd; HYPRE_Int jj_row_begin_diag, jj_row_begin_offd; HYPRE_Int start_indexing = 0; /* start indexing for C_data at 0 */ HYPRE_Int n_rows_A_global, n_cols_A_global; HYPRE_Int n_rows_P_global, n_cols_P_global; HYPRE_Int allsquare = 0; HYPRE_Int cnt, cnt_offd, cnt_diag; HYPRE_Int num_procs; HYPRE_Int value; double a_entry; double a_b_product; n_rows_A_global = hypre_ParCSRMatrixGlobalNumRows(A); n_cols_A_global = hypre_ParCSRMatrixGlobalNumCols(A); n_rows_P_global = hypre_ParCSRMatrixGlobalNumRows(P); n_cols_P_global = hypre_ParCSRMatrixGlobalNumCols(P); if (n_cols_A_global != n_rows_P_global || num_cols_diag_A != num_rows_diag_P) { hypre_printf(" Error! Incompatible matrix dimensions!\n"); return NULL; } /* if (num_rows_A==num_cols_P) allsquare = 1; */ /*----------------------------------------------------------------------- * Extract P_ext, i.e. portion of P that is stored on neighbor procs * and needed locally for matrix matrix product *-----------------------------------------------------------------------*/ hypre_MPI_Comm_size(comm, &num_procs); if (num_procs > 1) { /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings within * hypre_ParCSRMatrixExtractBExt *--------------------------------------------------------------------*/ Ps_ext = hypre_ParCSRMatrixExtractBExt(P,A,1); Ps_ext_data = hypre_CSRMatrixData(Ps_ext); Ps_ext_i = hypre_CSRMatrixI(Ps_ext); Ps_ext_j = hypre_CSRMatrixJ(Ps_ext); } P_ext_diag_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_offd_i = hypre_CTAlloc(HYPRE_Int, num_cols_offd_A+1); P_ext_diag_size = 0; P_ext_offd_size = 0; last_col_diag_P = first_col_diag_P + num_cols_diag_P -1; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) P_ext_offd_size++; else P_ext_diag_size++; P_ext_diag_i[i+1] = P_ext_diag_size; P_ext_offd_i[i+1] = P_ext_offd_size; } if (P_ext_diag_size) { P_ext_diag_j = hypre_CTAlloc(HYPRE_Int, P_ext_diag_size); P_ext_diag_data = hypre_CTAlloc(double, P_ext_diag_size); } if (P_ext_offd_size) { P_ext_offd_j = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size); P_ext_offd_data = hypre_CTAlloc(double, P_ext_offd_size); } cnt_offd = 0; cnt_diag = 0; for (i=0; i < num_cols_offd_A; i++) { for (j=Ps_ext_i[i]; j < Ps_ext_i[i+1]; j++) if (Ps_ext_j[j] < first_col_diag_P || Ps_ext_j[j] > last_col_diag_P) { P_ext_offd_j[cnt_offd] = Ps_ext_j[j]; P_ext_offd_data[cnt_offd++] = Ps_ext_data[j]; } else { P_ext_diag_j[cnt_diag] = Ps_ext_j[j] - first_col_diag_P; P_ext_diag_data[cnt_diag++] = Ps_ext_data[j]; } } if (num_procs > 1) { hypre_CSRMatrixDestroy(Ps_ext); Ps_ext = NULL; } cnt = 0; if (P_ext_offd_size || num_cols_offd_P) { temp = hypre_CTAlloc(HYPRE_Int, P_ext_offd_size+num_cols_offd_P); for (i=0; i < P_ext_offd_size; i++) temp[i] = P_ext_offd_j[i]; cnt = P_ext_offd_size; for (i=0; i < num_cols_offd_P; i++) temp[cnt++] = col_map_offd_P[i]; } if (cnt) { qsort0(temp, 0, cnt-1); num_cols_offd_C = 1; value = temp[0]; for (i=1; i < cnt; i++) { if (temp[i] > value) { value = temp[i]; temp[num_cols_offd_C++] = value; } } } if (num_cols_offd_C) col_map_offd_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_C); for (i=0; i < num_cols_offd_C; i++) col_map_offd_C[i] = temp[i]; if (P_ext_offd_size || num_cols_offd_P) hypre_TFree(temp); for (i=0 ; i < P_ext_offd_size; i++) P_ext_offd_j[i] = hypre_BinarySearch(col_map_offd_C, P_ext_offd_j[i], num_cols_offd_C); if (num_cols_offd_P) { map_P_to_C = hypre_CTAlloc(HYPRE_Int,num_cols_offd_P); cnt = 0; for (i=0; i < num_cols_offd_C; i++) if (col_map_offd_C[i] == col_map_offd_P[cnt]) { map_P_to_C[cnt++] = i; if (cnt == num_cols_offd_P) break; } } /*----------------------------------------------------------------------- * Allocate marker array. *-----------------------------------------------------------------------*/ P_marker = hypre_CTAlloc(HYPRE_Int, num_cols_diag_P+num_cols_offd_C); /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /* no changes for the marked version above this point */ /* This function call is the first pass: */ hypre_ParMatmul_RowSizes_Marked( &C_diag_i, &C_offd_i, &P_marker, A_diag_i, A_diag_j, A_offd_i, A_offd_j, P_diag_i, P_diag_j, P_offd_i, P_offd_j, P_ext_diag_i, P_ext_diag_j, P_ext_offd_i, P_ext_offd_j, map_P_to_C, &C_diag_size, &C_offd_size, num_rows_diag_A, num_cols_offd_A, allsquare, num_cols_diag_P, num_cols_offd_P, num_cols_offd_C, CF_marker, dof_func, dof_func_offd ); /* The above call of hypre_ParMatmul_RowSizes_Marked computed two scalars: C_diag_size, C_offd_size, and two arrays: C_diag_i, C_offd_i ( P_marker is also computed, but only used internally ) */ /*----------------------------------------------------------------------- * Allocate C_diag_data and C_diag_j arrays. * Allocate C_offd_data and C_offd_j arrays. *-----------------------------------------------------------------------*/ last_col_diag_P = first_col_diag_P + num_cols_diag_P - 1; C_diag_data = hypre_CTAlloc(double, C_diag_size); C_diag_j = hypre_CTAlloc(HYPRE_Int, C_diag_size); if (C_offd_size) { C_offd_data = hypre_CTAlloc(double, C_offd_size); C_offd_j = hypre_CTAlloc(HYPRE_Int, C_offd_size); } /*----------------------------------------------------------------------- * Second Pass: Fill in C_diag_data and C_diag_j. * Second Pass: Fill in C_offd_data and C_offd_j. *-----------------------------------------------------------------------*/ /*----------------------------------------------------------------------- * Initialize some stuff. *-----------------------------------------------------------------------*/ jj_count_diag = start_indexing; jj_count_offd = start_indexing; for (i1 = 0; i1 < num_cols_diag_P+num_cols_offd_C; i1++) { P_marker[i1] = -1; } /*----------------------------------------------------------------------- * Loop over interior c-points. *-----------------------------------------------------------------------*/ for (i1 = 0; i1 < num_rows_diag_A; i1++) { if ( CF_marker[i1] < 0 ) /* i1 is a fine row */ /* ... This and the coarse row code are the only parts between first pass and near the end where hypre_ParMatmul_FC is different from the regular hypre_ParMatmul */ { /*-------------------------------------------------------------------- * Create diagonal entry, C_{i1,i1} *--------------------------------------------------------------------*/ jj_row_begin_diag = jj_count_diag; jj_row_begin_offd = jj_count_offd; /*----------------------------------------------------------------- * Loop over entries in row i1 of A_offd. *-----------------------------------------------------------------*/ if (num_cols_offd_A) { for (jj2 = A_offd_i[i1]; jj2 < A_offd_i[i1+1]; jj2++) { i2 = A_offd_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func_offd[i2] ) { /* interpolate only like "functions" */ a_entry = A_offd_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_ext. *-----------------------------------------------------------*/ for (jj3 = P_ext_offd_i[i2]; jj3 < P_ext_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+P_ext_offd_j[jj3]; a_b_product = a_entry * P_ext_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else C_offd_data[P_marker[i3]] += a_b_product; } for (jj3 = P_ext_diag_i[i2]; jj3 < P_ext_diag_i[i2+1]; jj3++) { i3 = P_ext_diag_j[jj3]; a_b_product = a_entry * P_ext_diag_data[jj3]; if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else C_diag_data[P_marker[i3]] += a_b_product; } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } /*----------------------------------------------------------------- * Loop over entries in row i1 of A_diag. *-----------------------------------------------------------------*/ for (jj2 = A_diag_i[i1]; jj2 < A_diag_i[i1+1]; jj2++) { i2 = A_diag_j[jj2]; if( dof_func==NULL || dof_func[i1] == dof_func[i2] ) { /* interpolate only like "functions" */ a_entry = A_diag_data[jj2]; /*----------------------------------------------------------- * Loop over entries in row i2 of P_diag. *-----------------------------------------------------------*/ for (jj3 = P_diag_i[i2]; jj3 < P_diag_i[i2+1]; jj3++) { i3 = P_diag_j[jj3]; a_b_product = a_entry * P_diag_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_diag) { P_marker[i3] = jj_count_diag; C_diag_data[jj_count_diag] = a_b_product; C_diag_j[jj_count_diag] = i3; jj_count_diag++; } else { C_diag_data[P_marker[i3]] += a_b_product; } } if (num_cols_offd_P) { for (jj3 = P_offd_i[i2]; jj3 < P_offd_i[i2+1]; jj3++) { i3 = num_cols_diag_P+map_P_to_C[P_offd_j[jj3]]; a_b_product = a_entry * P_offd_data[jj3]; /*-------------------------------------------------------- * Check P_marker to see that C_{i1,i3} has not already * been accounted for. If it has not, create a new entry. * If it has, add new contribution. *--------------------------------------------------------*/ if (P_marker[i3] < jj_row_begin_offd) { P_marker[i3] = jj_count_offd; C_offd_data[jj_count_offd] = a_b_product; C_offd_j[jj_count_offd] = i3-num_cols_diag_P; jj_count_offd++; } else { C_offd_data[P_marker[i3]] += a_b_product; } } } } else { /* Interpolation mat should be 0 where i1 and i2 correspond to different "functions". As we haven't created an entry for C(i1,i2), nothing needs to be done. */ } } } else /* i1 is a coarse row.*/ /* Copy P coarse-row values to C. This is useful if C is meant to become a replacement for P */ { if (num_cols_offd_P) { for (jj2 = P_offd_i[i1]; jj2 < P_offd_i[i1+1]; jj2++) { C_offd_j[jj_count_offd] = P_offd_j[jj_count_offd]; C_offd_data[jj_count_offd] = P_offd_data[jj_count_offd]; ++jj_count_offd; } } for (jj2 = P_diag_i[i1]; jj2 < P_diag_i[i1+1]; jj2++) { C_diag_j[jj_count_diag] = P_diag_j[jj2]; C_diag_data[jj_count_diag] = P_diag_data[jj2]; ++jj_count_diag; } } } C = hypre_ParCSRMatrixCreate( comm, n_rows_A_global, n_cols_P_global, row_starts_A, col_starts_P, num_cols_offd_C, C_diag_size, C_offd_size ); /* Note that C does not own the partitionings */ hypre_ParCSRMatrixSetRowStartsOwner(C,0); hypre_ParCSRMatrixSetColStartsOwner(C,0); C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrixData(C_diag) = C_diag_data; hypre_CSRMatrixI(C_diag) = C_diag_i; hypre_CSRMatrixJ(C_diag) = C_diag_j; C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrixI(C_offd) = C_offd_i; hypre_ParCSRMatrixOffd(C) = C_offd; if (num_cols_offd_C) { hypre_CSRMatrixData(C_offd) = C_offd_data; hypre_CSRMatrixJ(C_offd) = C_offd_j; hypre_ParCSRMatrixColMapOffd(C) = col_map_offd_C; } /*----------------------------------------------------------------------- * Free various arrays *-----------------------------------------------------------------------*/ hypre_TFree(P_marker); hypre_TFree(P_ext_diag_i); if (P_ext_diag_size) { hypre_TFree(P_ext_diag_j); hypre_TFree(P_ext_diag_data); } hypre_TFree(P_ext_offd_i); if (P_ext_offd_size) { hypre_TFree(P_ext_offd_j); hypre_TFree(P_ext_offd_data); } if (num_cols_offd_P) hypre_TFree(map_P_to_C); return C; }
HYPRE_Int hypre_AMESetup(void *esolver) { HYPRE_Int ne, *edge_bc; hypre_AMEData *ame_data = esolver; hypre_AMSData *ams_data = ame_data -> precond; if (ams_data -> beta_is_zero) { ame_data -> t1 = hypre_ParVectorInDomainOf(ams_data -> G); ame_data -> t2 = hypre_ParVectorInDomainOf(ams_data -> G); } else { ame_data -> t1 = ams_data -> r1; ame_data -> t2 = ams_data -> g1; } ame_data -> t3 = ams_data -> r0; /* Eliminate boundary conditions in G = [Gii, Gib; 0, Gbb], i.e., compute [Gii, 0; 0, 0] */ { HYPRE_Int i, j, k, nv; HYPRE_Int *offd_edge_bc; hypre_ParCSRMatrix *Gt; nv = hypre_ParCSRMatrixNumCols(ams_data -> G); ne = hypre_ParCSRMatrixNumRows(ams_data -> G); edge_bc = hypre_TAlloc(HYPRE_Int, ne); for (i = 0; i < ne; i++) edge_bc[i] = 0; /* Find boundary (eliminated) edges */ { hypre_CSRMatrix *Ad = hypre_ParCSRMatrixDiag(ams_data -> A); HYPRE_Int *AdI = hypre_CSRMatrixI(Ad); HYPRE_Int *AdJ = hypre_CSRMatrixJ(Ad); HYPRE_Real *AdA = hypre_CSRMatrixData(Ad); hypre_CSRMatrix *Ao = hypre_ParCSRMatrixOffd(ams_data -> A); HYPRE_Int *AoI = hypre_CSRMatrixI(Ao); HYPRE_Real *AoA = hypre_CSRMatrixData(Ao); HYPRE_Real l1_norm; /* A row (edge) is boundary if its off-diag l1 norm is less than eps */ HYPRE_Real eps = DBL_EPSILON * 1e+4; for (i = 0; i < ne; i++) { l1_norm = 0.0; for (j = AdI[i]; j < AdI[i+1]; j++) if (AdJ[j] != i) l1_norm += fabs(AdA[j]); if (AoI) for (j = AoI[i]; j < AoI[i+1]; j++) l1_norm += fabs(AoA[j]); if (l1_norm < eps) edge_bc[i] = 1; } } hypre_ParCSRMatrixTranspose(ams_data -> G, &Gt, 1); /* Use a Matvec communication to find which of the edges connected to local vertices are on the boundary */ { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; HYPRE_Int num_sends, *int_buf_data; HYPRE_Int index, start; offd_edge_bc = hypre_CTAlloc(HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(Gt))); hypre_MatvecCommPkgCreate(Gt); comm_pkg = hypre_ParCSRMatrixCommPkg(Gt); num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); int_buf_data = hypre_CTAlloc(HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) { k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j); int_buf_data[index++] = edge_bc[k]; } } comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_edge_bc); hypre_ParCSRCommHandleDestroy(comm_handle); hypre_TFree(int_buf_data); } /* Eliminate boundary vertex entries in G^t */ { hypre_CSRMatrix *Gtd = hypre_ParCSRMatrixDiag(Gt); HYPRE_Int *GtdI = hypre_CSRMatrixI(Gtd); HYPRE_Int *GtdJ = hypre_CSRMatrixJ(Gtd); HYPRE_Real *GtdA = hypre_CSRMatrixData(Gtd); hypre_CSRMatrix *Gto = hypre_ParCSRMatrixOffd(Gt); HYPRE_Int *GtoI = hypre_CSRMatrixI(Gto); HYPRE_Int *GtoJ = hypre_CSRMatrixJ(Gto); HYPRE_Real *GtoA = hypre_CSRMatrixData(Gto); HYPRE_Int bdr; for (i = 0; i < nv; i++) { bdr = 0; /* A vertex is boundary if it belongs to a boundary edge */ for (j = GtdI[i]; j < GtdI[i+1]; j++) if (edge_bc[GtdJ[j]]) { bdr = 1; break; } if (!bdr && GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) if (offd_edge_bc[GtoJ[j]]) { bdr = 1; break; } if (bdr) { for (j = GtdI[i]; j < GtdI[i+1]; j++) /* if (!edge_bc[GtdJ[j]]) */ GtdA[j] = 0.0; if (GtoI) for (j = GtoI[i]; j < GtoI[i+1]; j++) /* if (!offd_edge_bc[GtoJ[j]]) */ GtoA[j] = 0.0; } } } hypre_ParCSRMatrixTranspose(Gt, &ame_data -> G, 1); hypre_ParCSRMatrixDestroy(Gt); hypre_TFree(offd_edge_bc); } /* Compute G^t M G */ { if (!hypre_ParCSRMatrixCommPkg(ame_data -> G)) hypre_MatvecCommPkgCreate(ame_data -> G); if (!hypre_ParCSRMatrixCommPkg(ame_data -> M)) hypre_MatvecCommPkgCreate(ame_data -> M); hypre_BoomerAMGBuildCoarseOperator(ame_data -> G, ame_data -> M, ame_data -> G, &ame_data -> A_G); hypre_ParCSRMatrixFixZeroRows(ame_data -> A_G); } /* Create AMG preconditioner and PCG-AMG solver for G^tMG */ { HYPRE_BoomerAMGCreate(&ame_data -> B1_G); HYPRE_BoomerAMGSetCoarsenType(ame_data -> B1_G, ams_data -> B_G_coarsen_type); HYPRE_BoomerAMGSetAggNumLevels(ame_data -> B1_G, ams_data -> B_G_agg_levels); HYPRE_BoomerAMGSetRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type); HYPRE_BoomerAMGSetNumSweeps(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetMaxLevels(ame_data -> B1_G, 25); HYPRE_BoomerAMGSetTol(ame_data -> B1_G, 0.0); HYPRE_BoomerAMGSetMaxIter(ame_data -> B1_G, 1); HYPRE_BoomerAMGSetStrongThreshold(ame_data -> B1_G, ams_data -> B_G_theta); /* don't use exact solve on the coarsest level (matrix may be singular) */ HYPRE_BoomerAMGSetCycleRelaxType(ame_data -> B1_G, ams_data -> B_G_relax_type, 3); HYPRE_ParCSRPCGCreate(hypre_ParCSRMatrixComm(ame_data->A_G), &ame_data -> B2_G); HYPRE_PCGSetPrintLevel(ame_data -> B2_G, 0); HYPRE_PCGSetTol(ame_data -> B2_G, 1e-12); HYPRE_PCGSetMaxIter(ame_data -> B2_G, 20); HYPRE_PCGSetPrecond(ame_data -> B2_G, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSolve, (HYPRE_PtrToSolverFcn) HYPRE_BoomerAMGSetup, ame_data -> B1_G); HYPRE_ParCSRPCGSetup(ame_data -> B2_G, (HYPRE_ParCSRMatrix)ame_data->A_G, (HYPRE_ParVector)ame_data->t1, (HYPRE_ParVector)ame_data->t2); } /* Setup LOBPCG */ { HYPRE_Int seed = 75; mv_InterfaceInterpreter* interpreter; mv_MultiVectorPtr eigenvectors; ame_data -> interpreter = hypre_CTAlloc(mv_InterfaceInterpreter,1); interpreter = (mv_InterfaceInterpreter*) ame_data -> interpreter; HYPRE_ParCSRSetupInterpreter(interpreter); ame_data -> eigenvalues = hypre_CTAlloc(HYPRE_Real, ame_data -> block_size); ame_data -> eigenvectors = mv_MultiVectorCreateFromSampleVector(interpreter, ame_data -> block_size, ame_data -> t3); eigenvectors = (mv_MultiVectorPtr) ame_data -> eigenvectors; mv_MultiVectorSetRandom (eigenvectors, seed); /* Make the initial vectors discretely divergence free */ { HYPRE_Int i, j; HYPRE_Real *data; mv_TempMultiVector* tmp = mv_MultiVectorGetData(eigenvectors); HYPRE_ParVector *v = (HYPRE_ParVector*)(tmp -> vector); hypre_ParVector *vi; for (i = 0; i < ame_data -> block_size; i++) { vi = (hypre_ParVector*) v[i]; data = hypre_VectorData(hypre_ParVectorLocalVector(vi)); for (j = 0; j < ne; j++) if (edge_bc[j]) data[j] = 0.0; hypre_AMEDiscrDivFreeComponent(esolver, vi); } } } hypre_TFree(edge_bc); return hypre_error_flag; }
hypre_ParCSRMatrix * hypre_ParMatMinus_F( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) /* hypre_ParMatMinus_F subtracts selected rows of its second argument from selected rows of its first argument. The marker array determines which rows are affected - those for which CF_marker<0. The result is returned as a new matrix. */ { /* If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want new Pik = Pik - Cik, for Fine i only, all k. This computation is purely local. */ /* This is _not_ a general-purpose matrix subtraction function. This is written for an interpolation problem where it is known that C(i,k) exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements). */ hypre_ParCSRMatrix *Pnew; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P ); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); HYPRE_Int *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C ); HYPRE_Int *Pnew_diag_i; HYPRE_Int *Pnew_diag_j; double *Pnew_diag_data; HYPRE_Int *Pnew_offd_i; HYPRE_Int *Pnew_offd_j; double *Pnew_offd_data; HYPRE_Int *Pnew_j2m; HYPRE_Int *Pnew_col_map_offd; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); /* HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */ HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int num_cols_offd_Pnew, num_rows_offd_Pnew; HYPRE_Int i1, jmin, jmax, jrange, jrangem1; HYPRE_Int j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg; double dc, dp; /* Pnew = hypre_ParCSRMatrixCompleteClone( C );*/ Pnew = hypre_ParCSRMatrixUnion( C, P ); ; hypre_ParCSRMatrixZero_F( Pnew, CF_marker ); /* fine rows of Pnew set to 0 */ hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */ /* ...Zero_F may not be needed depending on how Pnew is made */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag); Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag); Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd); Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd); Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag); Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd); Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew ); num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd); num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd); /* Find the j-ranges, needed to allocate a "reverse lookup" array. */ /* This is the max j - min j over P and Pnew (which here is a copy of C). Only the diag block is considered. */ /* For scalability reasons (jrange can get big) this won't work for the offd block. Also, indexing is more complicated in the offd block (c.f. col_map_offd). It's not clear, though whether the "quadratic" algorithm I'm using for the offd block is really any slower than the more complicated "linear" algorithm here. */ jrange = 0; jrangem1=-1; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* only Fine rows matter */ { jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ]; jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); /* If columns (of a given row) were in increasing order, the above would be sufficient. If not, the following would be necessary (and sufficient) */ jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1] ]; for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); } } /*----------------------------------------------------------------------- * Loop over Pnew_diag rows. Construct a temporary reverse array: * If j is a column number, Pnew_j2m[j] is the array index for j, i.e. * Pnew_diag_j[ Pnew_j2m[j] ] = j *-----------------------------------------------------------------------*/ Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange ); for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* Fine data only */ { /* just needed for an assertion below... */ for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1; jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; /* If columns (of a given row) were in increasing order, the above line would be sufficient. If not, the following loop would have to be added (or store the jmin computed above )*/ for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; hypre_assert( j-jmin>=0 ); hypre_assert( j-jmin<jrange ); Pnew_j2m[ j-jmin ] = m; } /*----------------------------------------------------------------------- * Loop over C_diag data for the current row. * Subtract each C data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc ) { jc = C_diag_j[mc]; dc = C_diag_data[mc]; m = Pnew_j2m[jc-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] -= dc; } /*----------------------------------------------------------------------- * Loop over P_diag data for the current row. * Add each P data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp ) { jp = P_diag_j[mp]; dp = P_diag_data[mp]; m = Pnew_j2m[jp-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] += dp; } } } /*----------------------------------------------------------------------- * Repeat for the offd block. *-----------------------------------------------------------------------*/ for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 ) /* Fine data only */ { if ( num_cols_offd_Pnew ) { /* This is a simple quadratic algorithm. If necessary I may try to implement the ideas used on the diag block later. */ for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m ) { j = Pnew_offd_j[m]; jg = Pnew_col_map_offd[j]; Pnew_offd_data[m] = 0; if ( num_cols_offd_C ) for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc ) { jC = C_offd_j[mc]; jCg = C_col_map_offd[jC]; if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc]; } if ( num_cols_offd_P ) for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp ) { jP = P_offd_j[mp]; jPg = P_col_map_offd[jP]; if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp]; } } } } } hypre_TFree(Pnew_j2m); return Pnew; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
HYPRE_Int hypre_BoomerAMGCreateNodalA(hypre_ParCSRMatrix *A, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int option, HYPRE_Int diag_option, hypre_ParCSRMatrix **AN_ptr) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); double *A_diag_data = hypre_CSRMatrixData(A_diag); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); double *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int num_variables = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int num_nonzeros_offd = 0; HYPRE_Int num_cols_offd = 0; hypre_ParCSRMatrix *AN; hypre_CSRMatrix *AN_diag; HYPRE_Int *AN_diag_i; HYPRE_Int *AN_diag_j; double *AN_diag_data; hypre_CSRMatrix *AN_offd; HYPRE_Int *AN_offd_i; HYPRE_Int *AN_offd_j; double *AN_offd_data; HYPRE_Int *col_map_offd_AN; HYPRE_Int *new_col_map_offd; HYPRE_Int *row_starts_AN; HYPRE_Int AN_num_nonzeros_diag = 0; HYPRE_Int AN_num_nonzeros_offd = 0; HYPRE_Int num_cols_offd_AN; HYPRE_Int new_num_cols_offd; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); HYPRE_Int num_sends; HYPRE_Int num_recvs; HYPRE_Int *send_procs; HYPRE_Int *send_map_starts; HYPRE_Int *send_map_elmts; HYPRE_Int *new_send_map_elmts; HYPRE_Int *recv_procs; HYPRE_Int *recv_vec_starts; hypre_ParCSRCommPkg *comm_pkg_AN; HYPRE_Int *send_procs_AN; HYPRE_Int *send_map_starts_AN; HYPRE_Int *send_map_elmts_AN; HYPRE_Int *recv_procs_AN; HYPRE_Int *recv_vec_starts_AN; HYPRE_Int i, j, k, k_map; HYPRE_Int ierr = 0; HYPRE_Int index, row; HYPRE_Int start_index; HYPRE_Int num_procs; HYPRE_Int node, cnt; HYPRE_Int mode; HYPRE_Int new_send_elmts_size; HYPRE_Int global_num_nodes; HYPRE_Int num_nodes; HYPRE_Int num_fun2; HYPRE_Int *map_to_node; HYPRE_Int *map_to_map; HYPRE_Int *counter; double sum; double *data; hypre_MPI_Comm_size(comm,&num_procs); if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } mode = fabs(option); comm_pkg_AN = NULL; col_map_offd_AN = NULL; #ifdef HYPRE_NO_GLOBAL_PARTITION row_starts_AN = hypre_CTAlloc(HYPRE_Int, 2); for (i=0; i < 2; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = hypre_ParCSRMatrixGlobalNumRows(A)/num_functions; #else row_starts_AN = hypre_CTAlloc(HYPRE_Int, num_procs+1); for (i=0; i < num_procs+1; i++) { row_starts_AN[i] = row_starts[i]/num_functions; if (row_starts_AN[i]*num_functions < row_starts[i]) { hypre_printf("nodes not properly aligned or incomplete info!\n"); return (87); } } global_num_nodes = row_starts_AN[num_procs]; #endif num_nodes = num_variables/num_functions; num_fun2 = num_functions*num_functions; map_to_node = hypre_CTAlloc(HYPRE_Int, num_variables); AN_diag_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); counter = hypre_CTAlloc(HYPRE_Int, num_nodes); for (i=0; i < num_variables; i++) map_to_node[i] = i/num_functions; for (i=0; i < num_nodes; i++) counter[i] = -1; AN_num_nonzeros_diag = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_diag_i[i] = AN_num_nonzeros_diag; for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_diag++; } } row++; } } AN_diag_i[num_nodes] = AN_num_nonzeros_diag; AN_diag_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_diag); AN_diag_data = hypre_CTAlloc(double, AN_num_nonzeros_diag); AN_diag = hypre_CSRMatrixCreate(num_nodes,num_nodes,AN_num_nonzeros_diag); hypre_CSRMatrixI(AN_diag) = AN_diag_i; hypre_CSRMatrixJ(AN_diag) = AN_diag_j; hypre_CSRMatrixData(AN_diag) = AN_diag_data; for (i=0; i < num_nodes; i++) counter[i] = -1; index = 0; start_index = 0; row = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]*A_diag_data[k]; index++; } else { AN_diag_data[counter[k_map]] += A_diag_data[k]*A_diag_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] = sqrt(AN_diag_data[i]); } break; case 2: /* sum of abs. value of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = fabs(A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) AN_diag_data[i] /= num_fun2; } break; case 3: /* largest element of each block (sets true value - not abs. value) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = A_diag_data[k]; index++; } else { if (fabs(A_diag_data[k]) > fabs(AN_diag_data[counter[k_map]])) AN_diag_data[counter[k_map]] = A_diag_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_diag*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; data[index*num_functions + j] = fabs(A_diag_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_diag_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_diag; i++) { AN_diag_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_diag_data[i] = hypre_max( AN_diag_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of all elements in each block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_diag_i[row]; k < A_diag_i[row+1]; k++) { k_map = map_to_node[A_diag_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_diag_j[index] = k_map; AN_diag_data[index] = (A_diag_data[k]); index++; } else { AN_diag_data[counter[k_map]] += (A_diag_data[k]); } } row++; } start_index = index; } } break; } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (DO MORE BELOW) */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; sum = 0.0; for (k = AN_diag_i[i]+1; k < AN_diag_i[i+1]; k++) { sum += AN_diag_data[k]; } AN_diag_data[index] = -sum; } } else if (diag_option == 2) { /* make all diagonal entries negative */ /* the diagonal is the first element listed in each row - */ for (i=0; i < num_nodes; i++) { index = AN_diag_i[i]; AN_diag_data[index] = - AN_diag_data[index]; } } num_nonzeros_offd = A_offd_i[num_variables]; AN_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes+1); num_cols_offd_AN = 0; if (comm_pkg) { comm_pkg_AN = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgComm(comm_pkg_AN) = comm; num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); hypre_ParCSRCommPkgNumSends(comm_pkg_AN) = num_sends; num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); hypre_ParCSRCommPkgNumRecvs(comm_pkg_AN) = num_recvs; send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); send_procs_AN = NULL; send_map_elmts_AN = NULL; if (num_sends) { send_procs_AN = hypre_CTAlloc(HYPRE_Int,num_sends); send_map_elmts_AN = hypre_CTAlloc(HYPRE_Int,send_map_starts[num_sends]); } send_map_starts_AN = hypre_CTAlloc(HYPRE_Int,num_sends+1); recv_vec_starts_AN = hypre_CTAlloc(HYPRE_Int,num_recvs+1); recv_procs_AN = NULL; if (num_recvs) recv_procs_AN = hypre_CTAlloc(HYPRE_Int,num_recvs); for (i=0; i < num_sends; i++) send_procs_AN[i] = send_procs[i]; for (i=0; i < num_recvs; i++) recv_procs_AN[i] = recv_procs[i]; send_map_starts_AN[0] = 0; cnt = 0; for (i=0; i < num_sends; i++) { k_map = send_map_starts[i]; if (send_map_starts[i+1]-k_map) send_map_elmts_AN[cnt++] = send_map_elmts[k_map]/num_functions; for (j=send_map_starts[i]+1; j < send_map_starts[i+1]; j++) { node = send_map_elmts[j]/num_functions; if (node > send_map_elmts_AN[cnt-1]) send_map_elmts_AN[cnt++] = node; } send_map_starts_AN[i+1] = cnt; } hypre_ParCSRCommPkgSendProcs(comm_pkg_AN) = send_procs_AN; hypre_ParCSRCommPkgSendMapStarts(comm_pkg_AN) = send_map_starts_AN; hypre_ParCSRCommPkgSendMapElmts(comm_pkg_AN) = send_map_elmts_AN; hypre_ParCSRCommPkgRecvProcs(comm_pkg_AN) = recv_procs_AN; hypre_ParCSRCommPkgRecvVecStarts(comm_pkg_AN) = recv_vec_starts_AN; } num_cols_offd = hypre_CSRMatrixNumCols(A_offd); if (num_cols_offd) { if (num_cols_offd > num_variables) { hypre_TFree(map_to_node); map_to_node = hypre_CTAlloc(HYPRE_Int,num_cols_offd); } num_cols_offd_AN = 1; map_to_node[0] = col_map_offd[0]/num_functions; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/num_functions; if (map_to_node[i] > map_to_node[i-1]) num_cols_offd_AN++; } if (num_cols_offd_AN > num_nodes) { hypre_TFree(counter); counter = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); } map_to_map = NULL; col_map_offd_AN = NULL; map_to_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); col_map_offd_AN = hypre_CTAlloc(HYPRE_Int,num_cols_offd_AN); col_map_offd_AN[0] = map_to_node[0]; recv_vec_starts_AN[0] = 0; cnt = 1; for (i=0; i < num_recvs; i++) { for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { node = map_to_node[j]; if (node > col_map_offd_AN[cnt-1]) { col_map_offd_AN[cnt++] = node; } map_to_map[j] = cnt-1; } recv_vec_starts_AN[i+1] = cnt; } for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; AN_num_nonzeros_offd = 0; row = 0; for (i=0; i < num_nodes; i++) { AN_offd_i[i] = AN_num_nonzeros_offd; for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < i) { counter[k_map] = i; AN_num_nonzeros_offd++; } } row++; } } AN_offd_i[num_nodes] = AN_num_nonzeros_offd; } AN_offd = hypre_CSRMatrixCreate(num_nodes,num_cols_offd_AN, AN_num_nonzeros_offd); hypre_CSRMatrixI(AN_offd) = AN_offd_i; if (AN_num_nonzeros_offd) { AN_offd_j = hypre_CTAlloc(HYPRE_Int, AN_num_nonzeros_offd); AN_offd_data = hypre_CTAlloc(double, AN_num_nonzeros_offd); hypre_CSRMatrixJ(AN_offd) = AN_offd_j; hypre_CSRMatrixData(AN_offd) = AN_offd_data; for (i=0; i < num_cols_offd_AN; i++) counter[i] = -1; index = 0; row = 0; AN_offd_i[0] = 0; start_index = 0; switch (mode) { case 1: /* frobenius norm */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]*A_offd_data[k]; index++; } else { AN_offd_data[counter[k_map]] += A_offd_data[k]*A_offd_data[k]; } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] = sqrt(AN_offd_data[i]); } break; case 2: /* sum of abs. value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = fabs(A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) AN_offd_data[i] /= num_fun2; } break; case 3: /* largest element in each block (not abs. value ) */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = A_offd_data[k]; index++; } else { if (fabs(A_offd_data[k]) > fabs(AN_offd_data[counter[k_map]])) AN_offd_data[counter[k_map]] = A_offd_data[k]; } } row++; } start_index = index; } } break; case 4: /* inf. norm (row-sum) */ { data = hypre_CTAlloc(double, AN_num_nonzeros_offd*num_functions); for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; data[index*num_functions + j] = fabs(A_offd_data[k]); index++; } else { data[(counter[k_map])*num_functions + j] += fabs(A_offd_data[k]); } } row++; } start_index = index; } for (i=0; i < AN_num_nonzeros_offd; i++) { AN_offd_data[i] = data[i*num_functions]; for (j=1; j< num_functions; j++) { AN_offd_data[i] = hypre_max( AN_offd_data[i],data[i*num_functions+j]); } } hypre_TFree(data); } break; case 6: /* sum of value of all elements in block */ { for (i=0; i < num_nodes; i++) { for (j=0; j < num_functions; j++) { for (k=A_offd_i[row]; k < A_offd_i[row+1]; k++) { k_map = map_to_map[A_offd_j[k]]; if (counter[k_map] < start_index) { counter[k_map] = index; AN_offd_j[index] = k_map; AN_offd_data[index] = (A_offd_data[k]); index++; } else { AN_offd_data[counter[k_map]] += (A_offd_data[k]); } } row++; } start_index = index; } } break; } hypre_TFree(map_to_map); } if (diag_option ==1 ) { /* make the diag entry the negative of the sum of off-diag entries (here we are adding the off_diag contribution)*/ /* the diagonal is the first element listed in each row of AN_diag_data - */ for (i=0; i < num_nodes; i++) { sum = 0.0; for (k = AN_offd_i[i]; k < AN_offd_i[i+1]; k++) { sum += AN_offd_data[k]; } index = AN_diag_i[i];/* location of diag entry in data */ AN_diag_data[index] -= sum; /* subtract from current value */ } } AN = hypre_ParCSRMatrixCreate(comm, global_num_nodes, global_num_nodes, row_starts_AN, row_starts_AN, num_cols_offd_AN, AN_num_nonzeros_diag, AN_num_nonzeros_offd); /* we already created the diag and offd matrices - so we don't need the ones created above */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(AN)); hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(AN)); hypre_ParCSRMatrixDiag(AN) = AN_diag; hypre_ParCSRMatrixOffd(AN) = AN_offd; hypre_ParCSRMatrixColMapOffd(AN) = col_map_offd_AN; hypre_ParCSRMatrixCommPkg(AN) = comm_pkg_AN; new_num_cols_offd = num_functions*num_cols_offd_AN; if (new_num_cols_offd > num_cols_offd) { new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols_offd); cnt = 0; for (i=0; i < num_cols_offd_AN; i++) { for (j=0; j < num_functions; j++) { new_col_map_offd[cnt++] = num_functions*col_map_offd_AN[i]+j; } } cnt = 0; for (i=0; i < num_cols_offd; i++) { while (col_map_offd[i] > new_col_map_offd[cnt]) cnt++; col_map_offd[i] = cnt++; } for (i=0; i < num_recvs+1; i++) { recv_vec_starts[i] = num_functions*recv_vec_starts_AN[i]; } for (i=0; i < num_nonzeros_offd; i++) { j = A_offd_j[i]; A_offd_j[i] = col_map_offd[j]; } hypre_ParCSRMatrixColMapOffd(A) = new_col_map_offd; hypre_CSRMatrixNumCols(A_offd) = new_num_cols_offd; hypre_TFree(col_map_offd); } hypre_TFree(map_to_node); new_send_elmts_size = send_map_starts_AN[num_sends]*num_functions; if (new_send_elmts_size > send_map_starts[num_sends]) { new_send_map_elmts = hypre_CTAlloc(HYPRE_Int,new_send_elmts_size); cnt = 0; send_map_starts[0] = 0; for (i=0; i < num_sends; i++) { send_map_starts[i+1] = send_map_starts_AN[i+1]*num_functions; for (j=send_map_starts_AN[i]; j < send_map_starts_AN[i+1]; j++) { for (k=0; k < num_functions; k++) new_send_map_elmts[cnt++] = send_map_elmts_AN[j]*num_functions+k; } } hypre_TFree(send_map_elmts); hypre_ParCSRCommPkgSendMapElmts(comm_pkg) = new_send_map_elmts; } *AN_ptr = AN; hypre_TFree(counter); return (ierr); }
HYPRE_Int hypre_ParCSRMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Int vecstride = hypre_VectorVectorStride( y_local ); HYPRE_Int idxstride = hypre_VectorIndexStride( y_local ); HYPRE_Complex *y_tmp_data, **y_buf_data; HYPRE_Complex *y_local_data = hypre_VectorData(y_local); HYPRE_Int num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(y_local); HYPRE_Int i, j, jv, index, start, num_sends; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); if ( num_vectors==1 ) { y_tmp = hypre_SeqVectorCreate(num_cols_offd); } else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); } hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_MatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( HYPRE_Complex*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(HYPRE_Complex, hypre_ParCSRCommPkgSendMapStart (comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* only 'column' storage of multivectors * implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
HYPRE_Int hypre_AMGSetupStats( void *amg_vdata ) { hypre_AMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_CSRMatrix **A_array; hypre_CSRMatrix **P_array; HYPRE_Int num_levels; HYPRE_Int num_nonzeros; /* HYPRE_Int amg_ioutdat; char *log_file_name; */ /* Local variables */ HYPRE_Int *A_i; double *A_data; HYPRE_Int *P_i; double *P_data; HYPRE_Int level; HYPRE_Int i,j; HYPRE_Int fine_size; HYPRE_Int coarse_size; HYPRE_Int entries; HYPRE_Int total_entries; HYPRE_Int min_entries; HYPRE_Int max_entries; double avg_entries; double rowsum; double min_rowsum; double max_rowsum; double sparse; double min_weight; double max_weight; double op_complxty=0; double grid_complxty=0; double num_nz0; double num_var0; A_array = hypre_AMGDataAArray(amg_data); P_array = hypre_AMGDataPArray(amg_data); num_levels = hypre_AMGDataNumLevels(amg_data); /* amg_ioutdat = hypre_AMGDataIOutDat(amg_data); log_file_name = hypre_AMGDataLogFileName(amg_data); */ hypre_printf("\n AMG SETUP PARAMETERS:\n\n"); hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data)); hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data)); hypre_printf(" Num levels = %d\n\n",num_levels); hypre_printf( "\nOperator Matrix Information:\n\n"); hypre_printf(" nonzero entries p"); hypre_printf("er row row sums\n"); hypre_printf("lev rows entries sparse min max "); hypre_printf("avg min max\n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]); num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]); for (level = 0; level < num_levels; level++) { A_i = hypre_CSRMatrixI(A_array[level]); A_data = hypre_CSRMatrixData(A_array[level]); fine_size = hypre_CSRMatrixNumRows(A_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]); sparse = num_nonzeros /((double) fine_size * (double) fine_size); op_complxty += ((double)num_nonzeros/num_nz0); grid_complxty += ((double)fine_size/num_var0); min_entries = A_i[1]-A_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; for (j = A_i[0]; j < A_i[1]; j++) min_rowsum += A_data[j]; max_rowsum = min_rowsum; for (j = 0; j < fine_size; j++) { entries = A_i[j+1] - A_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = A_i[j]; i < A_i[j+1]; i++) rowsum += A_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } avg_entries = ((double) total_entries) / ((double) fine_size); hypre_printf( "%2d %5d %7d %0.3f %3d %3d", level, fine_size, num_nonzeros, sparse, min_entries, max_entries); hypre_printf(" %4.1f %10.3e %10.3e\n", avg_entries, min_rowsum, max_rowsum); } hypre_printf( "\n\nInterpolation Matrix Information:\n\n"); hypre_printf(" entries/row min max"); hypre_printf(" row sums\n"); hypre_printf("lev rows cols min max "); hypre_printf(" weight weight min max \n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { P_i = hypre_CSRMatrixI(P_array[level]); P_data = hypre_CSRMatrixData(P_array[level]); fine_size = hypre_CSRMatrixNumRows(P_array[level]); coarse_size = hypre_CSRMatrixNumCols(P_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]); min_entries = P_i[1]-P_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; min_weight = P_data[0]; max_weight = 0.0; for (j = P_i[0]; j < P_i[1]; j++) min_rowsum += P_data[j]; max_rowsum = min_rowsum; for (j = 0; j < num_nonzeros; j++) { if (P_data[j] != 1.0) { min_weight = hypre_min(min_weight,P_data[j]); max_weight = hypre_max(max_weight,P_data[j]); } } for (j = 0; j < fine_size; j++) { entries = P_i[j+1] - P_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = P_i[j]; i < P_i[j+1]; i++) rowsum += P_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } hypre_printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, min_entries, max_entries); hypre_printf(" %5.3e %5.3e %5.3e %5.3e\n", min_weight, max_weight, min_rowsum, max_rowsum); } hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); hypre_printf(" Grid Complexity: %8.3f\n", grid_complxty); hypre_WriteSolverParams(amg_data); return(0); }
hypre_ParCSRBlockMatrix * hypre_ParCSRBlockMatrixConvertFromParCSRMatrix(hypre_ParCSRMatrix *matrix, HYPRE_Int matrix_C_block_size ) { MPI_Comm comm = hypre_ParCSRMatrixComm(matrix); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(matrix); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(matrix); HYPRE_Int global_num_rows = hypre_ParCSRMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_Int *col_map_offd = hypre_ParCSRBlockMatrixColMapOffd(matrix); HYPRE_Int *map_to_node=NULL, *counter=NULL, *col_in_j_map=NULL; HYPRE_Int *matrix_C_col_map_offd = NULL; HYPRE_Int matrix_C_num_cols_offd; HYPRE_Int matrix_C_num_nonzeros_offd; HYPRE_Int num_rows, num_nodes; HYPRE_Int *offd_i = hypre_CSRMatrixI(offd); HYPRE_Int *offd_j = hypre_CSRMatrixJ(offd); HYPRE_Complex * offd_data = hypre_CSRMatrixData(offd); hypre_ParCSRBlockMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; hypre_CSRBlockMatrix *matrix_C_diag; hypre_CSRBlockMatrix *matrix_C_offd; HYPRE_Int *matrix_C_offd_i=NULL, *matrix_C_offd_j = NULL; HYPRE_Complex *matrix_C_offd_data = NULL; HYPRE_Int num_procs, i, j, k, k_map, count, index, start_index, pos, row; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]/matrix_C_block_size; matrix_C_col_starts[i] = col_starts[i]/matrix_C_block_size; } #endif /************* create the diagonal part ************/ matrix_C_diag = hypre_CSRBlockMatrixConvertFromCSRMatrix(diag, matrix_C_block_size); /******* the offd part *******************/ /* can't use the same function for the offd part - because this isn't square and the offd j entries aren't global numbering (have to consider the offd map) - need to look at col_map_offd first */ /* figure out the new number of offd columns (num rows is same as diag) */ num_cols_offd = hypre_CSRMatrixNumCols(offd); num_rows = hypre_CSRMatrixNumRows(diag); num_nodes = num_rows/matrix_C_block_size; matrix_C_offd_i = hypre_CTAlloc(HYPRE_Int, num_nodes + 1); matrix_C_num_cols_offd = 0; matrix_C_offd_i[0] = 0; matrix_C_num_nonzeros_offd = 0; if (num_cols_offd) { map_to_node = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_num_cols_offd = 1; map_to_node[0] = col_map_offd[0]/matrix_C_block_size; for (i=1; i < num_cols_offd; i++) { map_to_node[i] = col_map_offd[i]/matrix_C_block_size; if (map_to_node[i] > map_to_node[i-1]) matrix_C_num_cols_offd++; } matrix_C_col_map_offd = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); col_in_j_map = hypre_CTAlloc(HYPRE_Int, num_cols_offd); matrix_C_col_map_offd[0] = map_to_node[0]; col_in_j_map[0] = 0; count = 1; j = 1; /* fill in the col_map_off_d - these are global numbers. Then we need to map these to j entries (these have local numbers) */ for (i=1; i < num_cols_offd; i++) { if (map_to_node[i] > map_to_node[i-1]) { matrix_C_col_map_offd[count++] = map_to_node[i]; } col_in_j_map[j++] = count - 1; } /* now figure the nonzeros */ matrix_C_num_nonzeros_offd = 0; counter = hypre_CTAlloc(HYPRE_Int, matrix_C_num_cols_offd); for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; for (i=0; i < num_nodes; i++) /* for each block row */ { matrix_C_offd_i[i] = matrix_C_num_nonzeros_offd; for (j=0; j < matrix_C_block_size; j++) { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col - see if this has been in this block row (i) already*/ if (counter[k_map] < i) /* not yet counted for this nodal row */ { counter[k_map] = i; matrix_C_num_nonzeros_offd++; } } } } /* fill in final i entry */ matrix_C_offd_i[num_nodes] = matrix_C_num_nonzeros_offd; } /* create offd matrix */ matrix_C_offd = hypre_CSRBlockMatrixCreate(matrix_C_block_size, num_nodes, matrix_C_num_cols_offd, matrix_C_num_nonzeros_offd); /* assign i */ hypre_CSRBlockMatrixI(matrix_C_offd) = matrix_C_offd_i; /* create (and allocate j and data) */ if (matrix_C_num_nonzeros_offd) { matrix_C_offd_j = hypre_CTAlloc(HYPRE_Int, matrix_C_num_nonzeros_offd); matrix_C_offd_data = hypre_CTAlloc(HYPRE_Complex, matrix_C_num_nonzeros_offd*matrix_C_block_size* matrix_C_block_size); hypre_CSRBlockMatrixJ(matrix_C_offd) = matrix_C_offd_j; hypre_CSRMatrixData(matrix_C_offd) = matrix_C_offd_data; for (i=0; i < matrix_C_num_cols_offd; i++) counter[i] = -1; index = 0; /*keep track of entry in matrix_C_offd_j*/ start_index = 0; for (i=0; i < num_nodes; i++) /* for each block row */ { for (j=0; j < matrix_C_block_size; j++) /* for each row in block */ { row = i*matrix_C_block_size+j; for (k=offd_i[row]; k < offd_i[row+1]; k++) /* go through single row's cols */ { k_map = col_in_j_map[offd_j[k]]; /*nodal col for off_d */ if (counter[k_map] < start_index) /* not yet counted for this nodal row */ { counter[k_map] = index; matrix_C_offd_j[index] = k_map; /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (index * matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; index ++; } else /* this col has already been listed for this row */ { /*copy the data: which position (corresponds to j array) + which row + which col */ pos = (counter[k_map]* matrix_C_block_size * matrix_C_block_size) + (j * matrix_C_block_size) + col_map_offd[offd_j[k]]%matrix_C_block_size; matrix_C_offd_data[pos] = offd_data[k]; } } } start_index = index; /* first index for current nodal row */ } } /* *********create the new matrix *************/ matrix_C = hypre_ParCSRBlockMatrixCreate(comm, matrix_C_block_size, global_num_rows/matrix_C_block_size, global_num_cols/matrix_C_block_size, matrix_C_row_starts, matrix_C_col_starts, matrix_C_num_cols_offd, hypre_CSRBlockMatrixNumNonzeros(matrix_C_diag), matrix_C_num_nonzeros_offd); /* use the diag and off diag matrices we have already created */ hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRBlockMatrixDiag(matrix_C) = matrix_C_diag; hypre_CSRBlockMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRBlockMatrixOffd(matrix_C) = matrix_C_offd; hypre_ParCSRMatrixColMapOffd(matrix_C) = matrix_C_col_map_offd; /* *********don't bother to copy the comm_pkg *************/ hypre_ParCSRBlockMatrixCommPkg(matrix_C) = NULL; /* CLEAN UP !!!! */ hypre_TFree(map_to_node); hypre_TFree(col_in_j_map); hypre_TFree(counter); return matrix_C; }