/* Function: hypre_CSRMatrixElimCreate Prepare the Ae matrix: count nnz, initialize I, allocate J and data. */ void hypre_CSRMatrixElimCreate(hypre_CSRMatrix *A, hypre_CSRMatrix *Ae, HYPRE_Int nrows, HYPRE_Int *rows, HYPRE_Int ncols, HYPRE_Int *cols, HYPRE_Int *col_mark) { HYPRE_Int i, j, col; HYPRE_Int A_beg, A_end; HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int A_rows = hypre_CSRMatrixNumRows(A); hypre_CSRMatrixI(Ae) = hypre_TAlloc(HYPRE_Int, A_rows+1); HYPRE_Int *Ae_i = hypre_CSRMatrixI(Ae); HYPRE_Int nnz = 0; for (i = 0; i < A_rows; i++) { Ae_i[i] = nnz; A_beg = A_i[i]; A_end = A_i[i+1]; if (hypre_BinarySearch(rows, i, nrows) >= 0) { /* full row */ nnz += A_end - A_beg; if (col_mark) { for (j = A_beg; j < A_end; j++) { col_mark[A_j[j]] = 1; } } } else { /* count columns */ for (j = A_beg; j < A_end; j++) { col = A_j[j]; if (hypre_BinarySearch(cols, col, ncols) >= 0) { nnz++; if (col_mark) { col_mark[col] = 1; } } } } } Ae_i[A_rows] = nnz; hypre_CSRMatrixJ(Ae) = hypre_TAlloc(HYPRE_Int, nnz); hypre_CSRMatrixData(Ae) = hypre_TAlloc(HYPRE_Real, nnz); hypre_CSRMatrixNumNonzeros(Ae) = nnz; }
/****************************************************************************** * * hypre_IJMatrixSetOffDiagRowSizesPETSc * sets offd_i inside the offd part of the ParCSRMatrix, * requires exact sizes for offd * *****************************************************************************/ HYPRE_Int hypre_IJMatrixSetOffDiagRowSizesPETSc(hypre_IJMatrix *matrix, HYPRE_Int *sizes) { HYPRE_Int local_num_rows; HYPRE_Int i; hypre_ParCSRMatrix *par_matrix; hypre_CSRMatrix *offd; HYPRE_Int *offd_i; par_matrix = hypre_IJMatrixLocalStorage(matrix); if (!par_matrix) return -1; offd = hypre_ParCSRMatrixOffd(par_matrix); offd_i = hypre_CSRMatrixI(offd); local_num_rows = hypre_CSRMatrixNumRows(offd); if (!offd_i) offd_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); for (i = 0; i < local_num_rows+1; i++) offd_i[i] = sizes[i]; hypre_CSRMatrixI(offd) = offd_i; hypre_CSRMatrixNumNonzeros(offd) = offd_i[local_num_rows]; return 0; }
/****************************************************************************** * * hypre_IJMatrixSetDiagRowSizesPETSc * sets diag_i inside the diag part of the ParCSRMatrix, * requires exact sizes for diag * *****************************************************************************/ HYPRE_Int hypre_IJMatrixSetDiagRowSizesPETSc(hypre_IJMatrix *matrix, HYPRE_Int *sizes) { HYPRE_Int local_num_rows; HYPRE_Int i; hypre_ParCSRMatrix *par_matrix; hypre_CSRMatrix *diag; HYPRE_Int *diag_i; par_matrix = hypre_IJMatrixLocalStorage(matrix); if (!par_matrix) return -1; diag = hypre_ParCSRMatrixDiag(par_matrix); diag_i = hypre_CSRMatrixI(diag); local_num_rows = hypre_CSRMatrixNumRows(diag); if (!diag_i) diag_i = hypre_CTAlloc(HYPRE_Int, local_num_rows+1); for (i = 0; i < local_num_rows+1; i++) diag_i[i] = sizes[i]; hypre_CSRMatrixI(diag) = diag_i; hypre_CSRMatrixNumNonzeros(diag) = diag_i[local_num_rows]; return 0; }
HYPRE_Int hypre_AMGSetupStats( void *amg_vdata ) { hypre_AMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_CSRMatrix **A_array; hypre_CSRMatrix **P_array; HYPRE_Int num_levels; HYPRE_Int num_nonzeros; /* HYPRE_Int amg_ioutdat; char *log_file_name; */ /* Local variables */ HYPRE_Int *A_i; double *A_data; HYPRE_Int *P_i; double *P_data; HYPRE_Int level; HYPRE_Int i,j; HYPRE_Int fine_size; HYPRE_Int coarse_size; HYPRE_Int entries; HYPRE_Int total_entries; HYPRE_Int min_entries; HYPRE_Int max_entries; double avg_entries; double rowsum; double min_rowsum; double max_rowsum; double sparse; double min_weight; double max_weight; double op_complxty=0; double grid_complxty=0; double num_nz0; double num_var0; A_array = hypre_AMGDataAArray(amg_data); P_array = hypre_AMGDataPArray(amg_data); num_levels = hypre_AMGDataNumLevels(amg_data); /* amg_ioutdat = hypre_AMGDataIOutDat(amg_data); log_file_name = hypre_AMGDataLogFileName(amg_data); */ hypre_printf("\n AMG SETUP PARAMETERS:\n\n"); hypre_printf(" Strength threshold = %f\n",hypre_AMGDataStrongThreshold(amg_data)); hypre_printf(" Max levels = %d\n",hypre_AMGDataMaxLevels(amg_data)); hypre_printf(" Num levels = %d\n\n",num_levels); hypre_printf( "\nOperator Matrix Information:\n\n"); hypre_printf(" nonzero entries p"); hypre_printf("er row row sums\n"); hypre_printf("lev rows entries sparse min max "); hypre_printf("avg min max\n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_var0 = (double) hypre_CSRMatrixNumRows(A_array[0]); num_nz0 = (double) hypre_CSRMatrixNumNonzeros(A_array[0]); for (level = 0; level < num_levels; level++) { A_i = hypre_CSRMatrixI(A_array[level]); A_data = hypre_CSRMatrixData(A_array[level]); fine_size = hypre_CSRMatrixNumRows(A_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(A_array[level]); sparse = num_nonzeros /((double) fine_size * (double) fine_size); op_complxty += ((double)num_nonzeros/num_nz0); grid_complxty += ((double)fine_size/num_var0); min_entries = A_i[1]-A_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; for (j = A_i[0]; j < A_i[1]; j++) min_rowsum += A_data[j]; max_rowsum = min_rowsum; for (j = 0; j < fine_size; j++) { entries = A_i[j+1] - A_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = A_i[j]; i < A_i[j+1]; i++) rowsum += A_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } avg_entries = ((double) total_entries) / ((double) fine_size); hypre_printf( "%2d %5d %7d %0.3f %3d %3d", level, fine_size, num_nonzeros, sparse, min_entries, max_entries); hypre_printf(" %4.1f %10.3e %10.3e\n", avg_entries, min_rowsum, max_rowsum); } hypre_printf( "\n\nInterpolation Matrix Information:\n\n"); hypre_printf(" entries/row min max"); hypre_printf(" row sums\n"); hypre_printf("lev rows cols min max "); hypre_printf(" weight weight min max \n"); hypre_printf("======================================="); hypre_printf("==========================\n"); /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { P_i = hypre_CSRMatrixI(P_array[level]); P_data = hypre_CSRMatrixData(P_array[level]); fine_size = hypre_CSRMatrixNumRows(P_array[level]); coarse_size = hypre_CSRMatrixNumCols(P_array[level]); num_nonzeros = hypre_CSRMatrixNumNonzeros(P_array[level]); min_entries = P_i[1]-P_i[0]; max_entries = 0; total_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; min_weight = P_data[0]; max_weight = 0.0; for (j = P_i[0]; j < P_i[1]; j++) min_rowsum += P_data[j]; max_rowsum = min_rowsum; for (j = 0; j < num_nonzeros; j++) { if (P_data[j] != 1.0) { min_weight = hypre_min(min_weight,P_data[j]); max_weight = hypre_max(max_weight,P_data[j]); } } for (j = 0; j < fine_size; j++) { entries = P_i[j+1] - P_i[j]; min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); total_entries += entries; rowsum = 0.0; for (i = P_i[j]; i < P_i[j+1]; i++) rowsum += P_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } hypre_printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, min_entries, max_entries); hypre_printf(" %5.3e %5.3e %5.3e %5.3e\n", min_weight, max_weight, min_rowsum, max_rowsum); } hypre_printf("\n Operator Complexity: %8.3f\n", op_complxty); hypre_printf(" Grid Complexity: %8.3f\n", grid_complxty); hypre_WriteSolverParams(amg_data); return(0); }
HYPRE_Int main( HYPRE_Int argc, char *argv[] ) { hypre_ParCSRMatrix *par_matrix, *g_matrix, **submatrices; hypre_CSRMatrix *A_diag, *A_offd; hypre_CSRBlockMatrix *diag; hypre_CSRBlockMatrix *offd; hypre_ParCSRBlockMatrix *par_blk_matrix, *par_blk_matrixT, *rap_matrix; hypre_Vector *x_local; hypre_Vector *y_local; hypre_ParVector *x; hypre_ParVector *y; HYPRE_Solver gmres_solver, precon; HYPRE_Int *diag_i, *diag_j, *offd_i, *offd_j; HYPRE_Int *diag_i2, *diag_j2, *offd_i2, *offd_j2; double *diag_d, *diag_d2, *offd_d, *offd_d2; HYPRE_Int mypid, local_size, nprocs; HYPRE_Int global_num_rows, global_num_cols, num_cols_offd; HYPRE_Int num_nonzeros_diag, num_nonzeros_offd, *colMap; HYPRE_Int ii, jj, kk, row, col, nnz, *indices, *colMap2; double *data, ddata, *y_data; HYPRE_Int *row_starts, *col_starts, *rstarts, *cstarts; HYPRE_Int *row_starts2, *col_starts2; HYPRE_Int block_size=2, bnnz=4, *index_set; FILE *fp; /* --------------------------------------------- */ /* Initialize MPI */ /* --------------------------------------------- */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &mypid); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &nprocs); /* build and fetch matrix */ MyBuildParLaplacian9pt((HYPRE_ParCSRMatrix *) &par_matrix); global_num_rows = hypre_ParCSRMatrixGlobalNumRows(par_matrix); global_num_cols = hypre_ParCSRMatrixGlobalNumCols(par_matrix); row_starts = hypre_ParCSRMatrixRowStarts(par_matrix); col_starts = hypre_ParCSRMatrixColStarts(par_matrix); A_diag = hypre_ParCSRMatrixDiag(par_matrix); A_offd = hypre_ParCSRMatrixOffd(par_matrix); num_cols_offd = hypre_CSRMatrixNumCols(A_offd); num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(A_diag); num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(A_offd); /* --------------------------------------------- */ /* build vector and apply matvec */ /* --------------------------------------------- */ x = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_cols,col_starts); hypre_ParVectorSetPartitioningOwner(x,0); hypre_ParVectorInitialize(x); x_local = hypre_ParVectorLocalVector(x); data = hypre_VectorData(x_local); local_size = col_starts[mypid+1] - col_starts[mypid]; for (ii = 0; ii < local_size; ii++) data[ii] = 1.0; y = hypre_ParVectorCreate(hypre_MPI_COMM_WORLD,global_num_rows,row_starts); hypre_ParVectorSetPartitioningOwner(y,0); hypre_ParVectorInitialize(y); hypre_ParCSRMatrixMatvec (1.0, par_matrix, x, 0.0, y); ddata = hypre_ParVectorInnerProd(y, y); if (mypid == 0) hypre_printf("y inner product = %e\n", ddata); hypre_ParVectorDestroy(x); hypre_ParVectorDestroy(y); /* --------------------------------------------- */ /* build block matrix */ /* --------------------------------------------- */ rstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) rstarts[ii] = row_starts[ii]; cstarts = hypre_CTAlloc(HYPRE_Int, nprocs+1); for (ii = 0; ii <= nprocs; ii++) cstarts[ii] = col_starts[ii]; par_blk_matrix = hypre_ParCSRBlockMatrixCreate(hypre_MPI_COMM_WORLD,block_size, global_num_rows, global_num_cols, rstarts, cstarts, num_cols_offd, num_nonzeros_diag, num_nonzeros_offd); colMap = hypre_ParCSRMatrixColMapOffd(par_matrix); if (num_cols_offd > 0) colMap2 = hypre_CTAlloc(HYPRE_Int, num_cols_offd); else colMap2 = NULL; for (ii = 0; ii < num_cols_offd; ii++) colMap2[ii] = colMap[ii]; hypre_ParCSRBlockMatrixColMapOffd(par_blk_matrix) = colMap2; diag_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(par_matrix)); diag_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixDiag(par_matrix)); diag_d = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(par_matrix)); diag = hypre_ParCSRBlockMatrixDiag(par_blk_matrix); diag_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); diag_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_diag); diag_d2 = hypre_CTAlloc(double, num_nonzeros_diag*bnnz); for (ii = 0; ii <= local_size; ii++) diag_i2[ii] = diag_i[ii]; for (ii = 0; ii < num_nonzeros_diag; ii++) diag_j2[ii] = diag_j[ii]; hypre_CSRBlockMatrixI(diag) = diag_i2; hypre_CSRBlockMatrixJ(diag) = diag_j2; for (ii = 0; ii < num_nonzeros_diag; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) diag_d2[ii*bnnz+jj*block_size+kk] = diag_d[ii]; else diag_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(diag) = diag_d2; offd_i = hypre_CSRMatrixI(hypre_ParCSRMatrixOffd(par_matrix)); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(par_matrix)); offd_d = hypre_CSRMatrixData(hypre_ParCSRMatrixOffd(par_matrix)); offd = hypre_ParCSRBlockMatrixOffd(par_blk_matrix); offd_i2 = hypre_CTAlloc(HYPRE_Int, local_size+1); for (ii = 0; ii <= local_size; ii++) offd_i2[ii] = offd_i[ii]; hypre_CSRBlockMatrixI(offd) = offd_i2; if (num_cols_offd) { offd_j2 = hypre_CTAlloc(HYPRE_Int, num_nonzeros_offd); for (ii = 0; ii < num_nonzeros_offd; ii++) offd_j2[ii] = offd_j[ii]; hypre_CSRBlockMatrixJ(offd) = offd_j2; offd_d2 = hypre_CTAlloc(double, num_nonzeros_offd*bnnz); for (ii = 0; ii < num_nonzeros_offd; ii++) { for (jj = 0; jj < block_size; jj++) for (kk = 0; kk < block_size; kk++) { if (jj <= kk) offd_d2[ii*bnnz+jj*block_size+kk] = offd_d[ii]; else offd_d2[ii*bnnz+jj*block_size+kk] = 0.0; } } hypre_CSRBlockMatrixData(offd) = offd_d2; } else {
hypre_ParCSRMatrix * hypre_ParMatMinus_F( hypre_ParCSRMatrix * P, hypre_ParCSRMatrix * C, HYPRE_Int * CF_marker ) /* hypre_ParMatMinus_F subtracts selected rows of its second argument from selected rows of its first argument. The marker array determines which rows are affected - those for which CF_marker<0. The result is returned as a new matrix. */ { /* If P=(Pik),C=(Cik), i in Fine+Coarse, k in Coarse, we want new Pik = Pik - Cik, for Fine i only, all k. This computation is purely local. */ /* This is _not_ a general-purpose matrix subtraction function. This is written for an interpolation problem where it is known that C(i,k) exists whenever P(i,k) does (because C=A*P where A has nonzero diagonal elements). */ hypre_ParCSRMatrix *Pnew; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *P_col_map_offd = hypre_ParCSRMatrixColMapOffd( P ); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); HYPRE_Int *C_col_map_offd = hypre_ParCSRMatrixColMapOffd( C ); HYPRE_Int *Pnew_diag_i; HYPRE_Int *Pnew_diag_j; double *Pnew_diag_data; HYPRE_Int *Pnew_offd_i; HYPRE_Int *Pnew_offd_j; double *Pnew_offd_data; HYPRE_Int *Pnew_j2m; HYPRE_Int *Pnew_col_map_offd; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); /* HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumRows(C_offd); */ HYPRE_Int num_cols_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_cols_offd_P = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int num_cols_offd_Pnew, num_rows_offd_Pnew; HYPRE_Int i1, jmin, jmax, jrange, jrangem1; HYPRE_Int j, m, mc, mp, jc, jp, jP, jC, jg, jCg, jPg; double dc, dp; /* Pnew = hypre_ParCSRMatrixCompleteClone( C );*/ Pnew = hypre_ParCSRMatrixUnion( C, P ); ; hypre_ParCSRMatrixZero_F( Pnew, CF_marker ); /* fine rows of Pnew set to 0 */ hypre_ParCSRMatrixCopy_C( Pnew, C, CF_marker ); /* coarse rows of Pnew copied from C (or P) */ /* ...Zero_F may not be needed depending on how Pnew is made */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_diag_i = hypre_CSRMatrixI(Pnew_diag); Pnew_diag_j = hypre_CSRMatrixJ(Pnew_diag); Pnew_offd_i = hypre_CSRMatrixI(Pnew_offd); Pnew_offd_j = hypre_CSRMatrixJ(Pnew_offd); Pnew_diag_data = hypre_CSRMatrixData(Pnew_diag); Pnew_offd_data = hypre_CSRMatrixData(Pnew_offd); Pnew_col_map_offd = hypre_ParCSRMatrixColMapOffd( Pnew ); num_rows_offd_Pnew = hypre_CSRMatrixNumRows(Pnew_offd); num_cols_offd_Pnew = hypre_CSRMatrixNumCols(Pnew_offd); /* Find the j-ranges, needed to allocate a "reverse lookup" array. */ /* This is the max j - min j over P and Pnew (which here is a copy of C). Only the diag block is considered. */ /* For scalability reasons (jrange can get big) this won't work for the offd block. Also, indexing is more complicated in the offd block (c.f. col_map_offd). It's not clear, though whether the "quadratic" algorithm I'm using for the offd block is really any slower than the more complicated "linear" algorithm here. */ jrange = 0; jrangem1=-1; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* only Fine rows matter */ { jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1+1]-1 ]; jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); /* If columns (of a given row) were in increasing order, the above would be sufficient. If not, the following would be necessary (and sufficient) */ jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; jmax = Pnew_diag_j[ Pnew_diag_i[i1] ]; for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); jmax = hypre_max( jmax, j ); } jrangem1 = jmax-jmin; jrange = hypre_max(jrange,jrangem1+1); } } /*----------------------------------------------------------------------- * Loop over Pnew_diag rows. Construct a temporary reverse array: * If j is a column number, Pnew_j2m[j] is the array index for j, i.e. * Pnew_diag_j[ Pnew_j2m[j] ] = j *-----------------------------------------------------------------------*/ Pnew_j2m = hypre_CTAlloc( HYPRE_Int, jrange ); for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_diag)>0 ) /* Fine data only */ { /* just needed for an assertion below... */ for ( j=0; j<jrange; ++j ) Pnew_j2m[j] = -1; jmin = Pnew_diag_j[ Pnew_diag_i[i1] ]; /* If columns (of a given row) were in increasing order, the above line would be sufficient. If not, the following loop would have to be added (or store the jmin computed above )*/ for ( m=Pnew_diag_i[i1]+1; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { j = P_diag_j[m]; jmin = hypre_min( jmin, j ); } for ( m = Pnew_diag_i[i1]; m<Pnew_diag_i[i1+1]; ++m ) { j = Pnew_diag_j[m]; hypre_assert( j-jmin>=0 ); hypre_assert( j-jmin<jrange ); Pnew_j2m[ j-jmin ] = m; } /*----------------------------------------------------------------------- * Loop over C_diag data for the current row. * Subtract each C data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mc=C_diag_i[i1]; mc<C_diag_i[i1+1]; ++mc ) { jc = C_diag_j[mc]; dc = C_diag_data[mc]; m = Pnew_j2m[jc-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] -= dc; } /*----------------------------------------------------------------------- * Loop over P_diag data for the current row. * Add each P data entry from the corresponding Pnew entry. *-----------------------------------------------------------------------*/ for ( mp=P_diag_i[i1]; mp<P_diag_i[i1+1]; ++mp ) { jp = P_diag_j[mp]; dp = P_diag_data[mp]; m = Pnew_j2m[jp-jmin]; hypre_assert( m>=0 ); Pnew_diag_data[m] += dp; } } } /*----------------------------------------------------------------------- * Repeat for the offd block. *-----------------------------------------------------------------------*/ for ( i1 = 0; i1 < num_rows_offd_Pnew; i1++ ) { if ( CF_marker[i1]<0 && hypre_CSRMatrixNumNonzeros(Pnew_offd)>0 ) /* Fine data only */ { if ( num_cols_offd_Pnew ) { /* This is a simple quadratic algorithm. If necessary I may try to implement the ideas used on the diag block later. */ for ( m = Pnew_offd_i[i1]; m<Pnew_offd_i[i1+1]; ++m ) { j = Pnew_offd_j[m]; jg = Pnew_col_map_offd[j]; Pnew_offd_data[m] = 0; if ( num_cols_offd_C ) for ( mc=C_offd_i[i1]; mc<C_offd_i[i1+1]; ++mc ) { jC = C_offd_j[mc]; jCg = C_col_map_offd[jC]; if ( jCg==jg ) Pnew_offd_data[m] -= C_offd_data[mc]; } if ( num_cols_offd_P ) for ( mp=P_offd_i[i1]; mp<P_offd_i[i1+1]; ++mp ) { jP = P_offd_j[mp]; jPg = P_col_map_offd[jP]; if ( jPg==jg ) Pnew_offd_data[m] += P_offd_data[mp]; } } } } } hypre_TFree(Pnew_j2m); return Pnew; }
/* Delete any matrix entry C(i,j) for which the corresponding entry P(i,j) doesn't exist - but only for "fine" rows C(i)<0 This is done as a purely local computation - C and P must have the same data distribution (among processors). */ void hypre_ParCSRMatrixDropEntries( hypre_ParCSRMatrix * C, hypre_ParCSRMatrix * P, HYPRE_Int * CF_marker ) { hypre_CSRMatrix *C_diag = hypre_ParCSRMatrixDiag(C); hypre_CSRMatrix *C_offd = hypre_ParCSRMatrixOffd(C); double *C_diag_data = hypre_CSRMatrixData(C_diag); HYPRE_Int *C_diag_i = hypre_CSRMatrixI(C_diag); HYPRE_Int *C_diag_j = hypre_CSRMatrixJ(C_diag); double *C_offd_data = hypre_CSRMatrixData(C_offd); HYPRE_Int *C_offd_i = hypre_CSRMatrixI(C_offd); HYPRE_Int *C_offd_j = hypre_CSRMatrixJ(C_offd); hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_C_diag_i; HYPRE_Int *new_C_offd_i; HYPRE_Int num_rows_diag_C = hypre_CSRMatrixNumRows(C_diag); HYPRE_Int num_rows_offd_C = hypre_CSRMatrixNumCols(C_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(C_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(C_offd); double vmax = 0.0; double vmin = 0.0; double v, old_sum, new_sum, scale; HYPRE_Int i1, m, m1d, m1o, jC, mP, keep; /* Repack the i,j,and data arrays of C so as to discard those elements for which there is no corresponding element in P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_C_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_C+1 ); new_C_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_C+1 ); m1d = C_diag_i[0]; m1o = C_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_C; i1++ ) { old_sum = 0; new_sum = 0; for ( m=C_diag_i[i1]; m<C_diag_i[i1+1]; ++m ) { v = C_diag_data[m]; jC = C_diag_j[m]; old_sum += v; /* Do we know anything about the order of P_diag_j? It would be better not to search through it all here. If we know nothing, some ordering or index scheme will be needed for efficiency (worth doing iff this function gets called at all ) (may2006: this function is no longer called) */ keep=0; for ( mP=P_diag_i[i1]; mP<P_diag_i[i1+1]; ++mP ) { if ( jC==P_diag_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || keep==1 ) { /* keep v in C */ new_sum += v; C_diag_j[m1d] = C_diag_j[m]; C_diag_data[m1d] = C_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=C_offd_i[i1]; m<C_offd_i[i1+1]; ++m ) { v = C_offd_data[m]; jC = C_diag_j[m]; old_sum += v; keep=0; for ( mP=P_offd_i[i1]; mP<P_offd_i[i1+1]; ++mP ) { if ( jC==P_offd_j[m] ) { keep=1; break; } } if ( CF_marker[i1]>=0 || v>=vmax || v<=vmin ) { /* keep v in C */ new_sum += v; C_offd_j[m1o] = C_offd_j[m]; C_offd_data[m1o] = C_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_C_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_C ) new_C_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_C_diag_i[i1]; m<new_C_diag_i[i1+1]; ++m ) C_diag_data[m] *= scale; if ( i1<num_rows_offd_C ) /* this test fails when there is no offd block */ for ( m=new_C_offd_i[i1]; m<new_C_offd_i[i1+1]; ++m ) C_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_C; i1++ ) { C_diag_i[i1] = new_C_diag_i[i1]; if ( i1<num_rows_offd_C ) C_offd_i[i1] = new_C_offd_i[i1]; } hypre_TFree( new_C_diag_i ); if ( num_rows_offd_C>0 ) hypre_TFree( new_C_offd_i ); hypre_CSRMatrixNumNonzeros(C_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(C_offd) = num_nonzeros_offd; /* SetNumNonzeros, SetDNumNonzeros are global, need hypre_MPI_Allreduce. I suspect, but don't know, that other parts of hypre do not assume that the correct values have been set. hypre_ParCSRMatrixSetNumNonzeros( C ); hypre_ParCSRMatrixSetDNumNonzeros( C );*/ hypre_ParCSRMatrixNumNonzeros( C ) = 0; hypre_ParCSRMatrixDNumNonzeros( C ) = 0.0; }
void hypre_BoomerAMGJacobiInterp_1( hypre_ParCSRMatrix * A, hypre_ParCSRMatrix ** P, hypre_ParCSRMatrix * S, HYPRE_Int * CF_marker, HYPRE_Int level, HYPRE_Real truncation_threshold, HYPRE_Real truncation_threshold_minus, HYPRE_Int * dof_func, HYPRE_Int * dof_func_offd, HYPRE_Real weight_AF) /* One step of Jacobi interpolation: A is the linear system. P is an interpolation matrix, input and output CF_marker identifies coarse and fine points If we imagine P and A as split into coarse and fine submatrices, [ AFF AFC ] [ AF ] [ IFC ] A = [ ] = [ ] , P = [ ] [ ACF ACC ] [ AC ] [ ICC ] (note that ICC is an identity matrix, applied to coarse points only) then this function computes IFCnew = IFCold - DFF(-1) * ( AFF*IFCold + AFC ) = IFCold - DFF(-1) * AF * Pold) where DFF is the diagonal of AFF, (-1) represents the inverse, and where "old" denotes a value on entry to this function, "new" a returned value. */ { hypre_ParCSRMatrix * Pnew; hypre_ParCSRMatrix * C; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); hypre_CSRMatrix *C_diag; hypre_CSRMatrix *C_offd; hypre_CSRMatrix *Pnew_diag; hypre_CSRMatrix *Pnew_offd; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int i; HYPRE_Int Jnochanges=0, Jchanges, Pnew_num_nonzeros; HYPRE_Int CF_coarse=0; HYPRE_Int * J_marker = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P ); HYPRE_Int nc, ncmax, ncmin, nc1; HYPRE_Int num_procs, my_id; MPI_Comm comm = hypre_ParCSRMatrixComm( A ); #ifdef HYPRE_JACINT_PRINT_ROW_SUMS HYPRE_Int m, nmav, npav; HYPRE_Real PIi, PIimax, PIimin, PIimav, PIipav, randthresh; HYPRE_Real eps = 1.0e-17; #endif #ifdef HYPRE_JACINT_PRINT_MATRICES char filename[80]; HYPRE_Int i_dummy, j_dummy; HYPRE_Int *base_i_ptr = &i_dummy; HYPRE_Int *base_j_ptr = &j_dummy; #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS HYPRE_Int sample_rows[50], n_sample_rows=0, isamp; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); for ( i=0; i<num_rows_diag_P; ++i ) { J_marker[i] = CF_marker[i]; if (CF_marker[i]>=0) ++CF_coarse; } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1, P has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(P_diag), hypre_CSRMatrixNumNonzeros(P_offd), hypre_CSRMatrixNumNonzeros(P_diag)+hypre_CSRMatrixNumNonzeros(P_offd), hypre_ParCSRMatrixLocalSumElts(*P) ); #endif /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P in max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) { ++nc1; } ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #if 0 /* a very agressive reduction in how much the Jacobi step does: */ for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc>ncmin+1) /*if ( nc > ncmin + 0.5*(ncmax-ncmin) )*/ { J_marker[i] = 1; ++Jnochanges; } } #endif Jchanges = num_rows_diag_P - Jnochanges - CF_coarse; #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some rows to be changed: "); randthresh = 15/(HYPRE_Real)Jchanges; for ( i=0; i<num_rows_diag_P; ++i ) { if ( J_marker[i]<0 ) { if ( ((HYPRE_Real)rand())/RAND_MAX < randthresh ) { hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); sample_rows[n_sample_rows] = i; ++n_sample_rows; } } } hypre_printf("\n"); #endif #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i don't change-good, %i coarse\n", my_id, level, num_rows_diag_P, Jchanges, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); #endif #ifdef HYPRE_JACINT_PRINT_MATRICES if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) { hypre_sprintf( filename, "Ain%i", level ); hypre_ParCSRMatrixPrintIJ( A,0,0,filename); hypre_sprintf( filename, "Sin%i", level ); hypre_ParCSRMatrixPrintIJ( S,0,0,filename); hypre_sprintf( filename, "Pin%i", level ); hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); } #endif C = hypre_ParMatmul_FC( A, *P, J_marker, dof_func, dof_func_offd ); /* hypre_parMatmul_FC creates and returns C, a variation of the matrix product A*P in which only the "Fine"-designated rows have been computed. (all columns are Coarse because all columns of P are). "Fine" is defined solely by the marker array, and for example could be a proper subset of the fine points of a multigrid hierarchy. As a matrix, C is the size of A*P. But only the marked rows have been computed. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "C%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif C_diag = hypre_ParCSRMatrixDiag(C); C_offd = hypre_ParCSRMatrixOffd(C); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after matmul, C has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(C_diag), hypre_CSRMatrixNumNonzeros(C_offd), hypre_CSRMatrixNumNonzeros(C_diag)+hypre_CSRMatrixNumNonzeros(C_offd), hypre_ParCSRMatrixLocalSumElts(C) ); #endif hypre_ParMatScaleDiagInv_F( C, A, weight_AF, J_marker ); /* hypre_ParMatScaleDiagInv scales of its first argument by premultiplying with a submatrix of the inverse of the diagonal of its second argument. The marker array determines which diagonal elements are used. The marker array should select exactly the right number of diagonal elements (the number of rows of AP_FC). */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Cout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( C,0,0,filename); #endif Pnew = hypre_ParMatMinus_F( *P, C, J_marker ); /* hypre_ParMatMinus_F subtracts rows of its second argument from selected rows of its first argument. The marker array determines which rows of the first argument are affected, and they should exactly correspond to all the rows of the second argument. */ Pnew_diag = hypre_ParCSRMatrixDiag(Pnew); Pnew_offd = hypre_ParCSRMatrixOffd(Pnew); Pnew_num_nonzeros = hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd); #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i Jacobi_Interp_1 after MatMinus, Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), Pnew_num_nonzeros, hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Transfer ownership of col_starts from P to Pnew ... */ if ( hypre_ParCSRMatrixColStarts(*P) && hypre_ParCSRMatrixColStarts(*P)==hypre_ParCSRMatrixColStarts(Pnew) ) { if ( hypre_ParCSRMatrixOwnsColStarts(*P) && !hypre_ParCSRMatrixOwnsColStarts(Pnew) ) { hypre_ParCSRMatrixSetColStartsOwner(*P,0); hypre_ParCSRMatrixSetColStartsOwner(Pnew,1); } } hypre_ParCSRMatrixDestroy( C ); hypre_ParCSRMatrixDestroy( *P ); /* Note that I'm truncating all the fine rows, not just the J-marked ones. */ #if 0 if ( Pnew_num_nonzeros < 10000 ) /* a fixed number like this makes it no.procs.-depdendent */ { /* ad-hoc attempt to reduce zero-matrix problems seen in testing..*/ truncation_threshold = 1.0e-6 * truncation_threshold; truncation_threshold_minus = 1.0e-6 * truncation_threshold_minus; } #endif hypre_BoomerAMGTruncateInterp( Pnew, truncation_threshold, truncation_threshold_minus, CF_marker ); hypre_MatvecCommPkgCreate ( Pnew ); *P = Pnew; P_diag = hypre_ParCSRMatrixDiag(*P); P_offd = hypre_ParCSRMatrixOffd(*P); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_diag_j = hypre_CSRMatrixJ(P_diag); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); /* row sum computations, for output */ #ifdef HYPRE_JACINT_PRINT_ROW_SUMS PIimax=-1.0e12, PIimin=1.0e12, PIimav=0, PIipav=0; nmav=0, npav=0; for ( i=0; i<num_rows_diag_P; ++i ) { PIi = 0; /* i-th value of P*1, i.e. sum of row i of P */ for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) PIi += P_diag_data[m]; for ( m=P_offd_i[i]; m<P_offd_i[i+1]; ++m ) PIi += P_offd_data[m]; if (CF_marker[i]<0) { PIimax = hypre_max( PIimax, PIi ); PIimin = hypre_min( PIimin, PIi ); if (PIi<=1-eps) { PIimav+=PIi; ++nmav; }; if (PIi>=1+eps) { PIipav+=PIi; ++npav; }; } } if ( nmav>0 ) PIimav = PIimav/nmav; if ( npav>0 ) PIipav = PIipav/npav; hypre_printf("%i %i P out max,min row sums %e %e\n", my_id, level, PIimax, PIimin ); #endif #ifdef HYPRE_JACINT_PRINT_SOME_ROWS hypre_printf("some changed rows: "); for ( isamp=0; isamp<n_sample_rows; ++isamp ) { i = sample_rows[isamp]; hypre_printf( "%i: ", i ); for ( m=P_diag_i[i]; m<P_diag_i[i+1]; ++m ) hypre_printf( " %i %f, ", P_diag_j[m], P_diag_data[m] ); hypre_printf("; "); } hypre_printf("\n"); #endif ncmax=0; ncmin=num_rows_diag_P; nc1=0; for ( i=0; i<num_rows_diag_P; ++i ) if (CF_marker[i]<0) { nc = P_diag_i[i+1] - P_diag_i[i]; if (nc<=1) ++nc1; ncmax = hypre_max( nc, ncmax ); ncmin = hypre_min( nc, ncmin ); } #ifdef HYPRE_JACINT_PRINT_DIAGNOSTICS hypre_printf("%i %i P has %i rows, %i changeable, %i too good, %i coarse\n", my_id, level, num_rows_diag_P, num_rows_diag_P-Jnochanges-CF_coarse, Jnochanges, CF_coarse ); hypre_printf("%i %i min,max diag cols per row: %i, %i; no.rows w.<=1 col: %i\n", my_id, level, ncmin, ncmax, nc1 ); hypre_printf("%i %i Jacobi_Interp_1 after truncation (%e), Pnew has %i+%i=%i nonzeros, local sum %e\n", my_id, level, truncation_threshold, hypre_CSRMatrixNumNonzeros(Pnew_diag), hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_CSRMatrixNumNonzeros(Pnew_diag)+hypre_CSRMatrixNumNonzeros(Pnew_offd), hypre_ParCSRMatrixLocalSumElts(Pnew) ); #endif /* Programming Notes: 1. Judging by around line 299 of par_interp.c, they typical use of CF_marker is that CF_marker>=0 means Coarse, CF_marker<0 means Fine. */ #ifdef HYPRE_JACINT_PRINT_MATRICES hypre_sprintf( filename, "Pout%i", level ); if ( num_rows_diag_P <= HYPRE_MAX_PRINTABLE_MATRIX ) hypre_ParCSRMatrixPrintIJ( *P,0,0,filename); #endif hypre_TFree( J_marker ); }
void hypre_BoomerAMGTruncateInterp( hypre_ParCSRMatrix *P, HYPRE_Real eps, HYPRE_Real dlt, HYPRE_Int * CF_marker ) /* Truncate the interpolation matrix P, but only in rows for which the marker is <0. Truncation means that an element P(i,j) is set to 0 if P(i,j)>0 and P(i,j)<eps*max( P(i,j) ) or if P(i,j)>0 and P(i,j)<dlt*max( -P(i,j) ) or if P(i,j)<0 and P(i,j)>dlt*min( -P(i,j) ) or if P(i,j)<0 and P(i,j)>eps*min( P(i,j) ) ( 0<eps,dlt<1, typically 0.1=dlt<eps=0.2, ) The min and max are only computed locally, as I'm guessing that there isn't usually much to be gained (in the way of improved performance) by getting them perfectly right. */ /* The function hypre_BoomerAMGInterpTruncation in par_interp.c is very similar. It looks at fabs(value) rather than separately dealing with value<0 and value>0 as recommended by Klaus Stuben, thus as this function does. In this function, only "marked" rows are affected. Lastly, in hypre_BoomerAMGInterpTruncation, if any element gets discarded, it reallocates arrays to the new size. */ { hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(P); HYPRE_Real *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); HYPRE_Real *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int *new_P_diag_i; HYPRE_Int *new_P_offd_i; HYPRE_Int num_rows_diag_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int num_rows_offd_P = hypre_CSRMatrixNumRows(P_offd); HYPRE_Int num_nonzeros_diag = hypre_CSRMatrixNumNonzeros(P_diag); HYPRE_Int num_nonzeros_offd = hypre_CSRMatrixNumNonzeros(P_offd); #if 0 MPI_Comm comm = hypre_ParCSRMatrixComm( P ); HYPRE_Real vmax1, vmin1; #endif HYPRE_Real vmax = 0.0; HYPRE_Real vmin = 0.0; HYPRE_Real v, old_sum, new_sum, scale, wmax, wmin; HYPRE_Int i1, m, m1d, m1o; /* compute vmax = eps*max(P(i,j)), vmin = eps*min(P(i,j)) */ for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; vmax = hypre_max( v, vmax ); vmin = hypre_min( v, vmin ); } } #if 0 /* This can make max,min global so results don't depend on no. processors We don't want this except for testing, or maybe this could be put someplace better. I don't like adding communication here, for a minor reason. */ vmax1 = vmax; vmin1 = vmin; hypre_MPI_Allreduce( &vmax1, &vmax, 1, HYPRE_MPI_REAL, hypre_MPI_MAX, comm ); hypre_MPI_Allreduce( &vmin1, &vmin, 1, HYPRE_MPI_REAL, hypre_MPI_MIN, comm ); #endif if ( vmax <= 0.0 ) vmax = 1.0; /* make sure no v is v>vmax if no v is v>0 */ if ( vmin >= 0.0 ) vmin = -1.0; /* make sure no v is v<vmin if no v is v<0 */ wmax = - dlt * vmin; wmin = - dlt * vmax; vmax *= eps; vmin *= eps; /* Repack the i,j,and data arrays so as to discard the small elements of P. Elements of Coarse rows (CF_marker>=0) are always kept. The arrays are not re-allocated, so there will generally be unused space at the ends of the arrays. */ new_P_diag_i = hypre_CTAlloc( HYPRE_Int, num_rows_diag_P+1 ); new_P_offd_i = hypre_CTAlloc( HYPRE_Int, num_rows_offd_P+1 ); m1d = P_diag_i[0]; m1o = P_offd_i[0]; for ( i1 = 0; i1 < num_rows_diag_P; i1++ ) { old_sum = 0; new_sum = 0; for ( m=P_diag_i[i1]; m<P_diag_i[i1+1]; ++m ) { v = P_diag_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_diag_j[m1d] = P_diag_j[m]; P_diag_data[m1d] = P_diag_data[m]; ++m1d; } else { /* discard v */ --num_nonzeros_diag; } } for ( m=P_offd_i[i1]; m<P_offd_i[i1+1]; ++m ) { v = P_offd_data[m]; old_sum += v; if ( CF_marker[i1]>=0 || ( v>=vmax && v>=wmax ) || ( v<=vmin && v<=wmin ) ) { /* keep v */ new_sum += v; P_offd_j[m1o] = P_offd_j[m]; P_offd_data[m1o] = P_offd_data[m]; ++m1o; } else { /* discard v */ --num_nonzeros_offd; } } new_P_diag_i[i1+1] = m1d; if ( i1<num_rows_offd_P ) new_P_offd_i[i1+1] = m1o; /* rescale to keep row sum the same */ if (new_sum!=0) scale = old_sum/new_sum; else scale = 1.0; for ( m=new_P_diag_i[i1]; m<new_P_diag_i[i1+1]; ++m ) P_diag_data[m] *= scale; if ( i1<num_rows_offd_P ) /* this test fails when there is no offd block */ for ( m=new_P_offd_i[i1]; m<new_P_offd_i[i1+1]; ++m ) P_offd_data[m] *= scale; } for ( i1 = 1; i1 <= num_rows_diag_P; i1++ ) { P_diag_i[i1] = new_P_diag_i[i1]; if ( i1<=num_rows_offd_P && num_nonzeros_offd>0 ) P_offd_i[i1] = new_P_offd_i[i1]; } hypre_TFree( new_P_diag_i ); if ( num_rows_offd_P>0 ) hypre_TFree( new_P_offd_i ); hypre_CSRMatrixNumNonzeros(P_diag) = num_nonzeros_diag; hypre_CSRMatrixNumNonzeros(P_offd) = num_nonzeros_offd; hypre_ParCSRMatrixSetDNumNonzeros( P ); hypre_ParCSRMatrixSetNumNonzeros( P ); }
hypre_ParCSRMatrix * hypre_ParCSRBlockMatrixConvertToParCSRMatrix(hypre_ParCSRBlockMatrix *matrix) { MPI_Comm comm = hypre_ParCSRBlockMatrixComm(matrix); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(matrix); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(matrix); HYPRE_Int block_size = hypre_ParCSRBlockMatrixBlockSize(matrix); HYPRE_Int global_num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(matrix); HYPRE_Int global_num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(matrix); HYPRE_Int *row_starts = hypre_ParCSRBlockMatrixRowStarts(matrix); HYPRE_Int *col_starts = hypre_ParCSRBlockMatrixColStarts(matrix); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int num_nonzeros_diag = hypre_CSRBlockMatrixNumNonzeros(diag); HYPRE_Int num_nonzeros_offd = hypre_CSRBlockMatrixNumNonzeros(offd); hypre_ParCSRMatrix *matrix_C; HYPRE_Int *matrix_C_row_starts; HYPRE_Int *matrix_C_col_starts; HYPRE_Int *counter, *new_j_map; HYPRE_Int size_j, size_map, index, new_num_cols, removed = 0; HYPRE_Int *offd_j, *col_map_offd, *new_col_map_offd; HYPRE_Int num_procs, i, j; hypre_CSRMatrix *diag_nozeros, *offd_nozeros; hypre_MPI_Comm_size(comm,&num_procs); #ifdef HYPRE_NO_GLOBAL_PARTITION matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, 2); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, 2); for(i = 0; i < 2; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #else matrix_C_row_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); matrix_C_col_starts = hypre_CTAlloc(HYPRE_Int, num_procs + 1); for(i = 0; i < num_procs + 1; i++) { matrix_C_row_starts[i] = row_starts[i]*block_size; matrix_C_col_starts[i] = col_starts[i]*block_size; } #endif matrix_C = hypre_ParCSRMatrixCreate(comm, global_num_rows*block_size, global_num_cols*block_size, matrix_C_row_starts, matrix_C_col_starts, num_cols_offd*block_size, num_nonzeros_diag*block_size*block_size, num_nonzeros_offd*block_size*block_size); hypre_ParCSRMatrixInitialize(matrix_C); /* DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(diag); /* AB - added to delete zeros */ diag_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixDiag(matrix_C), 1e-14); if(diag_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixDiag(matrix_C)); hypre_ParCSRMatrixDiag(matrix_C) = diag_nozeros; } /* OFF-DIAG */ hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = hypre_CSRBlockMatrixConvertToCSRMatrix(offd); /* AB - added to delete zeros - this just deletes from data and j arrays */ offd_nozeros = hypre_CSRMatrixDeleteZeros( hypre_ParCSRMatrixOffd(matrix_C), 1e-14); if(offd_nozeros) { hypre_CSRMatrixDestroy(hypre_ParCSRMatrixOffd(matrix_C)); hypre_ParCSRMatrixOffd(matrix_C) = offd_nozeros; removed = 1; } /* now convert the col_map_offd */ for (i = 0; i < num_cols_offd; i++) for (j = 0; j < block_size; j++) hypre_ParCSRMatrixColMapOffd(matrix_C)[i*block_size + j] = hypre_ParCSRBlockMatrixColMapOffd(matrix)[i]*block_size + j; /* if we deleted zeros, then it is possible that col_map_offd can be compressed as well - this requires some amount of work that could be skipped... */ if (removed) { size_map = num_cols_offd*block_size; counter = hypre_CTAlloc(HYPRE_Int, size_map); new_j_map = hypre_CTAlloc(HYPRE_Int, size_map); offd_j = hypre_CSRMatrixJ(hypre_ParCSRMatrixOffd(matrix_C)); col_map_offd = hypre_ParCSRMatrixColMapOffd(matrix_C); size_j = hypre_CSRMatrixNumNonzeros(hypre_ParCSRMatrixOffd(matrix_C)); /* mark which off_d entries are found in j */ for (i=0; i < size_j; i++) { counter[offd_j[i]] = 1; } /*now find new numbering for columns (we will delete the cols where counter = 0*/ index = 0; for (i=0; i < size_map; i++) { if (counter[i]) new_j_map[i] = index++; } new_num_cols = index; /* if there are some col entries to remove: */ if (!(index == size_map)) { /* go thru j and adjust entries */ for (i=0; i < size_j; i++) { offd_j[i] = new_j_map[offd_j[i]]; } /*now go thru col map and get rid of non-needed entries */ new_col_map_offd = hypre_CTAlloc(HYPRE_Int, new_num_cols); index = 0; for (i=0; i < size_map; i++) { if (counter[i]) { new_col_map_offd[index++] = col_map_offd[i]; } } /* set the new col map */ hypre_TFree(col_map_offd); hypre_ParCSRMatrixColMapOffd(matrix_C) = new_col_map_offd; /* modify the number of cols */ hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(matrix_C)) = new_num_cols; } hypre_TFree(new_j_map); hypre_TFree(counter); } hypre_ParCSRMatrixSetNumNonzeros( matrix_C ); hypre_ParCSRMatrixSetDNumNonzeros( matrix_C ); /* we will not copy the comm package */ hypre_ParCSRMatrixCommPkg(matrix_C) = NULL; return matrix_C; }
void hypre_CSRMatrixSplit(hypre_CSRMatrix *A, HYPRE_Int nr, HYPRE_Int nc, HYPRE_Int *row_block_num, HYPRE_Int *col_block_num, hypre_CSRMatrix **blocks) { HYPRE_Int i, j, k, bi, bj; HYPRE_Int* A_i = hypre_CSRMatrixI(A); HYPRE_Int* A_j = hypre_CSRMatrixJ(A); HYPRE_Complex* A_data = hypre_CSRMatrixData(A); HYPRE_Int A_rows = hypre_CSRMatrixNumRows(A); HYPRE_Int A_cols = hypre_CSRMatrixNumCols(A); HYPRE_Int *num_rows = hypre_CTAlloc(HYPRE_Int, nr); HYPRE_Int *num_cols = hypre_CTAlloc(HYPRE_Int, nc); HYPRE_Int *block_row = hypre_TAlloc(HYPRE_Int, A_rows); HYPRE_Int *block_col = hypre_TAlloc(HYPRE_Int, A_cols); for (i = 0; i < A_rows; i++) { block_row[i] = num_rows[row_block_num[i]]++; } for (j = 0; j < A_cols; j++) { block_col[j] = num_cols[col_block_num[j]]++; } /* allocate the blocks */ for (i = 0; i < nr; i++) { for (j = 0; j < nc; j++) { hypre_CSRMatrix *B = hypre_CSRMatrixCreate(num_rows[i], num_cols[j], 0); hypre_CSRMatrixI(B) = hypre_CTAlloc(HYPRE_Int, num_rows[i] + 1); blocks[i*nc + j] = B; } } /* count block row nnz */ for (i = 0; i < A_rows; i++) { bi = row_block_num[i]; for (j = A_i[i]; j < A_i[i+1]; j++) { bj = col_block_num[A_j[j]]; hypre_CSRMatrix *B = blocks[bi*nc + bj]; hypre_CSRMatrixI(B)[block_row[i] + 1]++; } } /* count block nnz */ for (k = 0; k < nr*nc; k++) { hypre_CSRMatrix *B = blocks[k]; HYPRE_Int* B_i = hypre_CSRMatrixI(B); HYPRE_Int nnz = 0, rs; for (int k = 1; k <= hypre_CSRMatrixNumRows(B); k++) { rs = B_i[k], B_i[k] = nnz, nnz += rs; } hypre_CSRMatrixJ(B) = hypre_TAlloc(HYPRE_Int, nnz); hypre_CSRMatrixData(B) = hypre_TAlloc(HYPRE_Complex, nnz); hypre_CSRMatrixNumNonzeros(B) = nnz; } /* populate blocks */ for (i = 0; i < A_rows; i++) { bi = row_block_num[i]; for (j = A_i[i]; j < A_i[i+1]; j++) { k = A_j[j]; bj = col_block_num[k]; hypre_CSRMatrix *B = blocks[bi*nc + bj]; HYPRE_Int *bii = hypre_CSRMatrixI(B) + block_row[i] + 1; hypre_CSRMatrixJ(B)[*bii] = block_col[k]; hypre_CSRMatrixData(B)[*bii] = A_data[j]; (*bii)++; } } hypre_TFree(block_col); hypre_TFree(block_row); hypre_TFree(num_cols); hypre_TFree(num_rows); }
HYPRE_Int hypre_AMGSolve( void *amg_vdata, hypre_CSRMatrix *A, hypre_Vector *f, hypre_Vector *u ) { hypre_AMGData *amg_data = amg_vdata; /* Data Structure variables */ HYPRE_Int amg_ioutdat; HYPRE_Int *num_coeffs; HYPRE_Int *num_variables; /* HYPRE_Int cycle_op_count; */ HYPRE_Int num_levels; /* HYPRE_Int num_functions; */ double tol; /* char *file_name; */ hypre_CSRMatrix **A_array; hypre_Vector **F_array; hypre_Vector **U_array; /* Local variables */ HYPRE_Int j; HYPRE_Int Solve_err_flag; HYPRE_Int max_iter; HYPRE_Int cycle_count; HYPRE_Int total_coeffs; HYPRE_Int total_variables; double alpha = 1.0; double beta = -1.0; /* double cycle_cmplxty; */ double operat_cmplxty; double grid_cmplxty; double conv_factor; double resid_nrm; double resid_nrm_init; double relative_resid; double rhs_norm; double old_resid; hypre_Vector *Vtemp; amg_ioutdat = hypre_AMGDataIOutDat(amg_data); /* file_name = hypre_AMGDataLogFileName(amg_data); */ /* num_functions = hypre_AMGDataNumFunctions(amg_data); */ num_levels = hypre_AMGDataNumLevels(amg_data); A_array = hypre_AMGDataAArray(amg_data); F_array = hypre_AMGDataFArray(amg_data); U_array = hypre_AMGDataUArray(amg_data); tol = hypre_AMGDataTol(amg_data); max_iter = hypre_AMGDataMaxIter(amg_data); num_coeffs = hypre_CTAlloc(HYPRE_Int, num_levels); num_variables = hypre_CTAlloc(HYPRE_Int, num_levels); num_coeffs[0] = hypre_CSRMatrixNumNonzeros(A_array[0]); num_variables[0] = hypre_CSRMatrixNumRows(A_array[0]); A_array[0] = A; F_array[0] = f; U_array[0] = u; Vtemp = hypre_SeqVectorCreate(num_variables[0]); hypre_SeqVectorInitialize(Vtemp); hypre_AMGDataVtemp(amg_data) = Vtemp; for (j = 1; j < num_levels; j++) { num_coeffs[j] = hypre_CSRMatrixNumNonzeros(A_array[j]); num_variables[j] = hypre_CSRMatrixNumRows(A_array[j]); } /*----------------------------------------------------------------------- * Write the solver parameters *-----------------------------------------------------------------------*/ /*if (amg_ioutdat > 0) hypre_WriteSolverParams(amg_data); */ /*----------------------------------------------------------------------- * Initialize the solver error flag and assorted bookkeeping variables *-----------------------------------------------------------------------*/ Solve_err_flag = 0; total_coeffs = 0; total_variables = 0; cycle_count = 0; operat_cmplxty = 0; grid_cmplxty = 0; /*----------------------------------------------------------------------- * open the log file and write some initial info *-----------------------------------------------------------------------*/ if (amg_ioutdat > 1) { hypre_printf("\n\nAMG SOLUTION INFO:\n"); } /*----------------------------------------------------------------------- * Compute initial fine-grid residual and print to logfile *-----------------------------------------------------------------------*/ hypre_SeqVectorCopy(F_array[0],Vtemp); hypre_CSRMatrixMatvec(alpha,A_array[0],U_array[0],beta,Vtemp); resid_nrm = sqrt(hypre_SeqVectorInnerProd(Vtemp,Vtemp)); resid_nrm_init = resid_nrm; rhs_norm = sqrt(hypre_SeqVectorInnerProd(f,f)); relative_resid = 9999; if (rhs_norm) { relative_resid = resid_nrm_init / rhs_norm; } else { relative_resid = resid_nrm_init; } if (amg_ioutdat == 2 || amg_ioutdat == 3) { hypre_printf(" relative\n"); hypre_printf(" residual factor residual\n"); hypre_printf(" -------- ------ --------\n"); hypre_printf(" Initial %e %e\n",resid_nrm_init, relative_resid); } /*----------------------------------------------------------------------- * Main V-cycle loop *-----------------------------------------------------------------------*/ while (relative_resid >= tol && cycle_count < max_iter && Solve_err_flag == 0) { hypre_AMGDataCycleOpCount(amg_data) = 0; /* Op count only needed for one cycle */ Solve_err_flag = hypre_AMGCycle(amg_data, F_array, U_array); old_resid = resid_nrm; /*--------------------------------------------------------------- * Compute fine-grid residual and residual norm *----------------------------------------------------------------*/ hypre_SeqVectorCopy(F_array[0],Vtemp); hypre_CSRMatrixMatvec(alpha,A_array[0],U_array[0],beta,Vtemp); resid_nrm = sqrt(hypre_SeqVectorInnerProd(Vtemp,Vtemp)); conv_factor = resid_nrm / old_resid; relative_resid = 9999; if (rhs_norm) { relative_resid = resid_nrm / rhs_norm; } else { relative_resid = resid_nrm; } ++cycle_count; if (amg_ioutdat == 2 || amg_ioutdat == 3) { hypre_printf(" Cycle %2d %e %f %e \n",cycle_count, resid_nrm,conv_factor,relative_resid); } } if (cycle_count == max_iter) Solve_err_flag = 1; /*----------------------------------------------------------------------- * Compute closing statistics *-----------------------------------------------------------------------*/ conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/((double) cycle_count))); for (j=0;j<hypre_AMGDataNumLevels(amg_data);j++) { total_coeffs += num_coeffs[j]; total_variables += num_variables[j]; } /* cycle_op_count = hypre_AMGDataCycleOpCount(amg_data); */ grid_cmplxty = ((double) total_variables) / ((double) num_variables[0]); operat_cmplxty = ((double) total_coeffs) / ((double) num_coeffs[0]); /* cycle_cmplxty = ((double) cycle_op_count) / ((double) num_coeffs[0]); */ if (amg_ioutdat > 1) { if (Solve_err_flag == 1) { hypre_printf("\n\n=============================================="); hypre_printf("\n NOTE: Convergence tolerance was not achieved\n"); hypre_printf(" within the allowed %d V-cycles\n",max_iter); hypre_printf("=============================================="); } hypre_printf("\n\n Average Convergence Factor = %f",conv_factor); hypre_printf("\n\n Complexity: grid = %f\n",grid_cmplxty); hypre_printf(" operator = %f\n",operat_cmplxty); /* hypre_printf(" cycle = %f\n\n",cycle_cmplxty); */ } /*---------------------------------------------------------- * Close the output file (if open) *----------------------------------------------------------*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); return(Solve_err_flag); }
/****************************************************************************** * hypre_BoomerAMGFitInterpVectors * This routine for updating the interp operator to interpolate the supplied smooth vectors with a L.S. fitting. This code (varient 0) was used for the Baker, Kolev and Yang elasticity paper in section 3 to evaluate the least squares fitting methed proposed by Stuben in his talk (see paper for details). So this code is basically a post-processing step that performs the LS fit (the size and sparsity of P do not change). Note: truncation only works correctly for 1 processor - needs to just use the other truncation rouitne Variant = 0: do L.S. fit to existing interp weights (default) Variant = 1: extends the neighborhood to incl. other unknowns on the same node - ASSUMES A NODAL COARSENING, ASSUMES VARIABLES ORDERED GRID POINT, THEN UNKNOWN (e.g., u0, v0, u1, v1, etc. ), AND AT MOST 3 FCNS (NOTE: **only** works with 1 processor) This code is not compiled or accessible through hypre at this time (it was not particularly effective - compared to the LN and GM approaches), but is checked-in in case there is interest in the future. ******************************************************************************/ HYPRE_Int hypre_BoomerAMGFitInterpVectors( hypre_ParCSRMatrix *A, hypre_ParCSRMatrix **P, HYPRE_Int num_smooth_vecs, hypre_ParVector **smooth_vecs, hypre_ParVector **coarse_smooth_vecs, double delta, HYPRE_Int num_functions, HYPRE_Int *dof_func, HYPRE_Int *CF_marker, HYPRE_Int max_elmts, double trunc_factor, HYPRE_Int variant, HYPRE_Int level) { HYPRE_Int i,j, k; HYPRE_Int one_i = 1; HYPRE_Int info; HYPRE_Int coarse_index;; HYPRE_Int num_coarse_diag; HYPRE_Int num_coarse_offd; HYPRE_Int num_nonzeros = 0; HYPRE_Int coarse_point = 0; HYPRE_Int k_size; HYPRE_Int k_alloc; HYPRE_Int counter; HYPRE_Int *piv; HYPRE_Int tmp_int; HYPRE_Int num_sends; double *alpha; double *Beta; double *w; double *w_old; double *B_s; double tmp_double; double one = 1.0; double mone = -1.0;; double *vec_data; hypre_CSRMatrix *P_diag = hypre_ParCSRMatrixDiag(*P); hypre_CSRMatrix *P_offd = hypre_ParCSRMatrixOffd(*P); double *P_diag_data = hypre_CSRMatrixData(P_diag); HYPRE_Int *P_diag_i = hypre_CSRMatrixI(P_diag); HYPRE_Int *P_diag_j = hypre_CSRMatrixJ(P_diag); double *P_offd_data = hypre_CSRMatrixData(P_offd); HYPRE_Int *P_offd_i = hypre_CSRMatrixI(P_offd); HYPRE_Int *P_offd_j = hypre_CSRMatrixJ(P_offd); HYPRE_Int num_rows_P = hypre_CSRMatrixNumRows(P_diag); HYPRE_Int P_diag_size = P_diag_i[num_rows_P]; HYPRE_Int P_offd_size = P_offd_i[num_rows_P]; HYPRE_Int num_cols_P_offd = hypre_CSRMatrixNumCols(P_offd); HYPRE_Int *col_map_offd_P; hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int num_cols_A_offd = hypre_CSRMatrixNumCols(A_offd); hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(*P); hypre_ParCSRCommHandle *comm_handle; MPI_Comm comm; double *dbl_buf_data; double *smooth_vec_offd = NULL; double *offd_vec_data; HYPRE_Int index, start; HYPRE_Int *P_marker; HYPRE_Int num_procs; hypre_ParVector *vector; HYPRE_Int new_nnz, orig_start, j_pos, fcn_num, num_elements; HYPRE_Int *P_diag_j_new; double *P_diag_data_new; HYPRE_Int adjust_3D[] = {1, 2, -1, 1, -2, -1}; HYPRE_Int adjust_2D[] = {1, -1}; HYPRE_Int *adjust_list; if (variant ==1 && num_functions > 1) { /* First add new entries to P with value 0.0 corresponding to weights from other unknowns on the same grid point */ /* Loop through each row */ new_nnz = P_diag_size*num_functions; /* this is an over-estimate */ P_diag_j_new = hypre_CTAlloc(HYPRE_Int, new_nnz); P_diag_data_new = hypre_CTAlloc (double, new_nnz); if (num_functions ==2) adjust_list = adjust_2D; else if (num_functions ==3) adjust_list = adjust_3D; j_pos = 0; orig_start = 0; /* loop through rows */ for (i=0; i < num_rows_P; i++) { fcn_num = (HYPRE_Int) fmod(i, num_functions); if (fcn_num != dof_func[i]) printf("WARNING - ROWS incorrectly ordered!\n"); /* loop through elements */ num_elements = P_diag_i[i+1] - orig_start; /* add zeros corrresponding to other unknowns */ if (num_elements > 1) { for (j=0; j < num_elements; j++) { P_diag_j_new[j_pos] = P_diag_j[orig_start+j]; P_diag_data_new[j_pos++] = P_diag_data[orig_start+j]; for (k=0; k < num_functions-1; k++) { P_diag_j_new[j_pos] = P_diag_j[orig_start+j]+ ADJUST(fcn_num,k); P_diag_data_new[j_pos++] = 0.0; } } } else if (num_elements == 1)/* only one element - just copy to new */ { P_diag_j_new[j_pos] = P_diag_j[orig_start]; P_diag_data_new[j_pos++] = P_diag_data[orig_start]; } orig_start = P_diag_i[i+1]; if (num_elements > 1) P_diag_i[i+1] = P_diag_i[i] + num_elements*num_functions; else P_diag_i[i+1] = P_diag_i[i] + num_elements; if (j_pos != P_diag_i[i+1]) printf("Problem!\n"); }/* end loop through rows */ /* modify P */ hypre_TFree(P_diag_j); hypre_TFree(P_diag_data); hypre_CSRMatrixJ(P_diag) = P_diag_j_new; hypre_CSRMatrixData(P_diag) = P_diag_data_new; hypre_CSRMatrixNumNonzeros(P_diag) = P_diag_i[num_rows_P]; P_diag_j = P_diag_j_new; P_diag_data = P_diag_data_new; /* check if there is already a comm pkg - if so, destroy*/ if (comm_pkg) { hypre_MatvecCommPkgDestroy(comm_pkg ); comm_pkg = NULL; } } /* end variant == 1 and num functions > 0 */
HYPRE_Int hypre_ParCSRMatrixToParChordMatrix( hypre_ParCSRMatrix *Ap, MPI_Comm comm, hypre_ParChordMatrix **pAc ) { HYPRE_Int * row_starts = hypre_ParCSRMatrixRowStarts(Ap); HYPRE_Int * col_starts = hypre_ParCSRMatrixColStarts(Ap); hypre_CSRMatrix * diag = hypre_ParCSRMatrixDiag(Ap); hypre_CSRMatrix * offd = hypre_ParCSRMatrixOffd(Ap); HYPRE_Int * offd_j = hypre_CSRMatrixJ(offd); HYPRE_Int * diag_j = hypre_CSRMatrixJ(diag); HYPRE_Int * col_map_offd = hypre_ParCSRMatrixColMapOffd(Ap); HYPRE_Int first_col_diag = hypre_ParCSRMatrixFirstColDiag(Ap); hypre_ParChordMatrix * Ac; hypre_NumbersNode * rdofs, * offd_cols_me; hypre_NumbersNode ** offd_cols; HYPRE_Int ** offd_col_array; HYPRE_Int * len_offd_col_array, * offd_col_array_me; HYPRE_Int len_offd_col_array_me; HYPRE_Int num_idofs, num_rdofs, j_local, j_global, row_global; HYPRE_Int i, j, jj, p, pto, q, qto, my_id, my_q, row, ireq; HYPRE_Int num_inprocessors, num_toprocessors, num_procs, len_num_rdofs_toprocessor; HYPRE_Int *inprocessor, *toprocessor, *pcr, *qcr, *num_inchords, *chord, *chordto; HYPRE_Int *inproc, *toproc, *num_rdofs_toprocessor; HYPRE_Int **inchord_idof, **inchord_rdof, **rdof_toprocessor; double **inchord_data; double data; HYPRE_Int *first_index_idof, *first_index_rdof; hypre_MPI_Request * request; hypre_MPI_Status * status; hypre_MPI_Comm_rank(comm, &my_id); hypre_MPI_Comm_size(comm, &num_procs); num_idofs = row_starts[my_id+1] - row_starts[my_id]; num_rdofs = col_starts[my_id+1] - col_starts[my_id]; hypre_ParChordMatrixCreate( pAc, comm, num_idofs, num_rdofs ); Ac = *pAc; /* The following block sets Inprocessor: On each proc. my_id, we find the columns in the offd and diag blocks (global no.s). The columns are rdofs (contrary to what I wrote in ChordMatrix.txt). For each such col/rdof r, find the proc. p which owns row/idof r. We set the temporary array pcr[p]=1 for such p. An MPI all-to-all will exchange such arrays so my_id's array qcr has qcr[q]=1 iff, on proc. q, pcr[my_id]=1. In other words, qcr[q]=1 if my_id owns a row/idof i which is the same as a col/rdof owned by q. Collect all such q's into in the array Inprocessor. While constructing pcr, we also construct pj such that for any index jj into offd_j,offd_data, pj[jj] is the processor which owns jj as a row (idof) (the number jj is local to this processor). */ pcr = hypre_CTAlloc( HYPRE_Int, num_procs ); qcr = hypre_CTAlloc( HYPRE_Int, num_procs ); for ( p=0; p<num_procs; ++p ) pcr[p]=0; for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(offd); ++jj ) { j_local = offd_j[jj]; j_global = col_map_offd[j_local]; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj] = p;*/ break; } } } /* jjd = jj; ...not used yet */ /* pcr[my_id] = 1; ...for square matrices (with nonzero diag block) this one line would do the job of the following nested loop. For non-square matrices, the data distribution is too arbitrary. */ for ( jj=0; jj<hypre_CSRMatrixNumNonzeros(diag); ++jj ) { j_local = diag_j[jj]; j_global = j_local + first_col_diag; for ( p=0; p<num_procs; ++p ) { if ( j_global >= row_starts[p] && j_global<row_starts[p+1] ) { pcr[p]=1; /* not used yet... pj[jj+jjd] = p;*/ break; } } } /* Now pcr[p]=1 iff my_id owns a col/rdof r which proc. p owns as a row/idof */ hypre_MPI_Alltoall( pcr, 1, HYPRE_MPI_INT, qcr, 1, HYPRE_MPI_INT, comm ); /* Now qcr[q]==1 if my_id owns a row/idof i which is a col/rdof of proc. q The array of such q's is the array Inprocessor. */ num_inprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) ++num_inprocessors; inprocessor = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( qcr[q]==1 ) inprocessor[ p++ ] = q; num_toprocessors = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) ++num_toprocessors; toprocessor = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); p = 0; for ( q=0; q<num_procs; ++q ) if ( pcr[q]==1 ) toprocessor[ p++ ] = q; hypre_ParChordMatrixNumInprocessors(Ac) = num_inprocessors; hypre_ParChordMatrixInprocessor(Ac) = inprocessor; hypre_ParChordMatrixNumToprocessors(Ac) = num_toprocessors; hypre_ParChordMatrixToprocessor(Ac) = toprocessor; hypre_TFree( qcr ); /* FirstIndexIdof[p] is the global index of proc. p's row 0 */ /* FirstIndexRdof[p] is the global index of proc. p's col 0 */ /* Fir FirstIndexIdof, we copy the array row_starts rather than its pointers, because the chord matrix will think it's free to delete FirstIndexIdof */ /* col_starts[p] contains the global index of the first column in the diag block of p. But for first_index_rdof we want the global index of the first column in p (whether that's in the diag or offd block). So it's more involved than row/idof: we also check the offd block, and have to do a gather to get first_index_rdof for every proc. on every proc. */ first_index_idof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); first_index_rdof = hypre_CTAlloc( HYPRE_Int, num_procs+1 ); for ( p=0; p<=num_procs; ++p ) { first_index_idof[p] = row_starts[p]; first_index_rdof[p] = col_starts[p]; }; if ( hypre_CSRMatrixNumRows(offd) > 0 && hypre_CSRMatrixNumCols(offd) > 0 ) first_index_rdof[my_id] = col_starts[my_id]<col_map_offd[0] ? col_starts[my_id] : col_map_offd[0]; hypre_MPI_Allgather( &first_index_rdof[my_id], 1, HYPRE_MPI_INT, first_index_rdof, 1, HYPRE_MPI_INT, comm ); /* Set num_inchords: num_inchords[p] is no. chords on my_id connected to p. Set each chord (idof,jdof,data). We go through each matrix element in the diag block, find what processor owns its column no. as a row, then update num_inchords[p], inchord_idof[p], inchord_rdof[p], inchord_data[p]. */ inchord_idof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_rdof = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); inchord_data = hypre_CTAlloc( double*, num_inprocessors ); num_inchords = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chord = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); chordto = hypre_CTAlloc( HYPRE_Int, num_toprocessors ); num_rdofs = 0; for ( q=0; q<num_inprocessors; ++q ) num_inchords[q] = 0; my_q = -1; for ( q=0; q<num_inprocessors; ++q ) if ( inprocessor[q]==my_id ) my_q = q; hypre_assert( my_q>=0 ); /* diag block: first count chords (from my_id to my_id), then set them from diag block's CSR data structure */ num_idofs = hypre_CSRMatrixNumRows(diag); rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; hypre_NumbersEnter( rdofs, j_local ); ++num_inchords[my_q]; } }; num_rdofs = hypre_NumbersNEntered( rdofs ); inchord_idof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_rdof[my_q] = hypre_CTAlloc( HYPRE_Int, num_inchords[my_q] ); inchord_data[my_q] = hypre_CTAlloc( double, num_inchords[my_q] ); chord[0] = 0; for ( row=0; row<hypre_CSRMatrixNumRows(diag); ++row ) { for ( i=hypre_CSRMatrixI(diag)[row]; i<hypre_CSRMatrixI(diag)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(diag)[i]; data = hypre_CSRMatrixData(diag)[i]; inchord_idof[my_q][chord[0]] = row; /* Here We need to convert from j_local - a column local to the diag of this proc., to a j which is local only to this processor - a column (rdof) numbering scheme to be shared by the diag and offd blocks... */ j_global = j_local + hypre_ParCSRMatrixColStarts(Ap)[my_q]; j = j_global - first_index_rdof[my_q]; inchord_rdof[my_q][chord[0]] = j; inchord_data[my_q][chord[0]] = data; hypre_assert( chord[0] < num_inchords[my_q] ); ++chord[0]; } }; hypre_NumbersDeleteNode(rdofs); /* offd block: */ /* >>> offd_cols_me duplicates rdofs */ offd_cols_me = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( offd_cols_me, j_global ); } } offd_col_array = hypre_CTAlloc( HYPRE_Int*, num_inprocessors ); len_offd_col_array = hypre_CTAlloc( HYPRE_Int, num_inprocessors ); offd_col_array_me = hypre_NumbersArray( offd_cols_me ); len_offd_col_array_me = hypre_NumbersNEntered( offd_cols_me ); request = hypre_CTAlloc(hypre_MPI_Request, 2*num_procs ); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( &len_offd_col_array[q], 1, HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( &len_offd_col_array_me, 1, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(status); ireq = 0; for ( q=0; q<num_inprocessors; ++q ) offd_col_array[q] = hypre_CTAlloc( HYPRE_Int, len_offd_col_array[q] ); for ( q=0; q<num_inprocessors; ++q ) hypre_MPI_Irecv( offd_col_array[q], len_offd_col_array[q], HYPRE_MPI_INT, inprocessor[q], 0, comm, &request[ireq++] ); for ( p=0; p<num_procs; ++p ) if ( pcr[p]==1 ) { hypre_MPI_Isend( offd_col_array_me, len_offd_col_array_me, HYPRE_MPI_INT, p, 0, comm, &request[ireq++] ); } status = hypre_CTAlloc(hypre_MPI_Status, ireq ); hypre_MPI_Waitall( ireq, request, status ); hypre_TFree(request); hypre_TFree(status); offd_cols = hypre_CTAlloc( hypre_NumbersNode *, num_inprocessors ); for ( q=0; q<num_inprocessors; ++q ) { offd_cols[q] = hypre_NumbersNewNode(); for ( i=0; i<len_offd_col_array[q]; ++i ) hypre_NumbersEnter( offd_cols[q], offd_col_array[q][i] ); } len_num_rdofs_toprocessor = 1 + hypre_CSRMatrixI(offd) [hypre_CSRMatrixNumRows(offd)]; inproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); toproc = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); num_rdofs_toprocessor = hypre_CTAlloc( HYPRE_Int, len_num_rdofs_toprocessor ); for ( qto=0; qto<len_num_rdofs_toprocessor; ++qto ) { inproc[qto] = -1; toproc[qto] = -1; num_rdofs_toprocessor[qto] = 0; }; rdofs = hypre_NumbersNewNode(); for ( row=0; row<hypre_CSRMatrixNumRows(offd); ++row ) { for ( i=hypre_CSRMatrixI(offd)[row]; i<hypre_CSRMatrixI(offd)[row+1]; ++i ) { j_local = hypre_CSRMatrixJ(offd)[i]; j_global = col_map_offd[j_local]; hypre_NumbersEnter( rdofs, j_local ); /* TO DO: find faster ways to do the two processor lookups below.*/ /* Find a processor p (local index q) from the inprocessor list, which owns the column(rdof) whichis the same as this processor's row(idof) row. Update num_inchords for p. Save q as inproc[i] for quick recall later. It represents an inprocessor (not unique) connected to a chord i. */ inproc[i] = -1; for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; if ( hypre_NumbersQuery( offd_cols[q], row+hypre_ParCSRMatrixFirstRowIndex(Ap) ) == 1 ) { /* row is one of the offd columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } if ( inproc[i]<0 ) { /* For square matrices, we would have found the column in some other processor's offd. But for non-square matrices it could exist only in some other processor's diag...*/ /* Note that all data in a diag block is stored. We don't check whether the value of a data entry is zero. */ for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { p = inprocessor[q]; row_global = row+hypre_ParCSRMatrixFirstRowIndex(Ap); if ( row_global>=col_starts[p] && row_global< col_starts[p+1] ) { /* row is one of the diag columns of p */ ++num_inchords[q]; inproc[i] = q; break; } } } hypre_assert( inproc[i]>=0 ); /* Find the processor pto (local index qto) from the toprocessor list, which owns the row(idof) which is the same as this processor's column(rdof) j_global. Update num_rdofs_toprocessor for pto. Save pto as toproc[i] for quick recall later. It represents the toprocessor connected to a chord i. */ for ( qto=0; qto<num_toprocessors; ++qto ) { pto = toprocessor[qto]; if ( j_global >= row_starts[pto] && j_global<row_starts[pto+1] ) { hypre_assert( qto < len_num_rdofs_toprocessor ); ++num_rdofs_toprocessor[qto]; /* ... an overestimate, as if two chords share an rdof, that rdof will be counted twice in num_rdofs_toprocessor. It can be fixed up later.*/ toproc[i] = qto; break; } } } }; num_rdofs += hypre_NumbersNEntered(rdofs); hypre_NumbersDeleteNode(rdofs); for ( q=0; q<num_inprocessors; ++q ) if (q!=my_q) { inchord_idof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_rdof[q] = hypre_CTAlloc( HYPRE_Int, num_inchords[q] ); inchord_data[q] = hypre_CTAlloc( double, num_inchords[q] ); chord[q] = 0; };