void hypre_sort_and_create_inverse_map( HYPRE_Int *in, HYPRE_Int len, HYPRE_Int **out, hypre_UnorderedIntMap *inverse_map) { if (len == 0) { return; } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] -= hypre_MPI_Wtime(); #endif HYPRE_Int *temp = hypre_TAlloc(HYPRE_Int, len); hypre_merge_sort(in, temp, len, out); hypre_UnorderedIntMapCreate(inverse_map, 2*len, 16*hypre_NumThreads()); HYPRE_Int i; #pragma omp parallel for HYPRE_SMP_SCHEDULE for (i = 0; i < len; i++) { HYPRE_Int old = hypre_UnorderedIntMapPutIfAbsent(inverse_map, (*out)[i], i); assert(old == HYPRE_HOPSCOTCH_HASH_EMPTY); #ifdef DBG_MERGE_SORT if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); assert(false); } #endif } #ifdef DBG_MERGE_SORT std::unordered_map<HYPRE_Int, HYPRE_Int> inverse_map2(len); for (HYPRE_Int i = 0; i < len; ++i) { inverse_map2[(*out)[i]] = i; if (hypre_UnorderedIntMapGet(inverse_map, (*out)[i]) != i) { fprintf(stderr, "%d %d\n", i, (*out)[i]); assert(false); } } assert(hypre_UnorderedIntMapSize(inverse_map) == len); #endif if (*out == in) { hypre_TFree(temp); } else { hypre_TFree(in); } #ifdef HYPRE_PROFILE hypre_profile_times[HYPRE_TIMER_ID_MERGE] += hypre_MPI_Wtime(); #endif }
HYPRE_Int *hypre_UnorderedIntSetCopyToArray( hypre_UnorderedIntSet *s, HYPRE_Int *len ) { HYPRE_Int prefix_sum_workspace[hypre_NumThreads() + 1]; HYPRE_Int *ret_array = NULL; #ifdef HYPRE_CONCURRENT_HOPSCOTCH #pragma omp parallel #endif { HYPRE_Int n = s->bucketMask + HYPRE_HOPSCOTCH_HASH_INSERT_RANGE; HYPRE_Int i_begin, i_end; hypre_GetSimpleThreadPartition(&i_begin, &i_end, n); HYPRE_Int cnt = 0; HYPRE_Int i; for (i = i_begin; i < i_end; i++) { if (HYPRE_HOPSCOTCH_HASH_EMPTY != s->hash[i]) cnt++; } hypre_prefix_sum(&cnt, len, prefix_sum_workspace); #ifdef HYPRE_CONCURRENT_HOPSCOTCH #pragma omp barrier #pragma omp master #endif { ret_array = hypre_TAlloc(HYPRE_Int, *len); } #ifdef HYPRE_CONCURRENT_HOPSCOTCH #pragma omp barrier #endif for (i = i_begin; i < i_end; i++) { if (HYPRE_HOPSCOTCH_HASH_EMPTY != s->hash[i]) ret_array[cnt++] = s->key[i]; } } return ret_array; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = (hypre_ParAMGData*)amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; int i, i1, j, jv, jj, ns, ne, size, rest; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { for (i1 = 0; i1 < num_threads; i1++) { size = num_cols/num_threads; rest = num_cols - size*num_threads; if (i1 < rest) { ns = i1*size+i1-1; ne = (i1+1)*size+i1+1; } else { ns = i1*size+rest-1; ne = (i1+1)*size+rest; } if ( num_vectors==1 ) { for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[j] += A_data[jj] * x_data[i]; } } } else { for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; if (j > ns && j < ne) y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } } } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } return ierr; }
HYPRE_Int hypre_MaxwellSolve2( void * maxwell_vdata, hypre_SStructMatrix * A_in, hypre_SStructVector * f, hypre_SStructVector * u ) { hypre_MaxwellData *maxwell_data = maxwell_vdata; hypre_ParVector *f_edge; hypre_ParVector *u_edge; HYPRE_Int max_iter = maxwell_data-> max_iter; double tol = maxwell_data-> tol; HYPRE_Int rel_change = maxwell_data-> rel_change; HYPRE_Int zero_guess = maxwell_data-> zero_guess; HYPRE_Int npre_relax = maxwell_data-> num_pre_relax; HYPRE_Int npost_relax = maxwell_data-> num_post_relax; hypre_ParCSRMatrix **Ann_l = maxwell_data-> Ann_l; hypre_ParCSRMatrix **Pn_l = maxwell_data-> Pn_l; hypre_ParCSRMatrix **RnT_l = maxwell_data-> RnT_l; hypre_ParVector **bn_l = maxwell_data-> bn_l; hypre_ParVector **xn_l = maxwell_data-> xn_l; hypre_ParVector **resn_l = maxwell_data-> resn_l; hypre_ParVector **en_l = maxwell_data-> en_l; hypre_ParVector **nVtemp2_l = maxwell_data-> nVtemp2_l; HYPRE_Int **nCF_marker_l = maxwell_data-> nCF_marker_l; double *nrelax_weight= maxwell_data-> nrelax_weight; double *nomega = maxwell_data-> nomega; HYPRE_Int nrelax_type = maxwell_data-> nrelax_type; HYPRE_Int node_numlevs = maxwell_data-> node_numlevels; hypre_ParCSRMatrix *Tgrad = maxwell_data-> Tgrad; hypre_ParCSRMatrix *T_transpose = maxwell_data-> T_transpose; hypre_ParCSRMatrix **Aee_l = maxwell_data-> Aee_l; hypre_IJMatrix **Pe_l = maxwell_data-> Pe_l; hypre_IJMatrix **ReT_l = maxwell_data-> ReT_l; hypre_ParVector **be_l = maxwell_data-> be_l; hypre_ParVector **xe_l = maxwell_data-> xe_l; hypre_ParVector **rese_l = maxwell_data-> rese_l; hypre_ParVector **ee_l = maxwell_data-> ee_l; hypre_ParVector **eVtemp2_l = maxwell_data-> eVtemp2_l; HYPRE_Int **eCF_marker_l = maxwell_data-> eCF_marker_l; double *erelax_weight= maxwell_data-> erelax_weight; double *eomega = maxwell_data-> eomega; HYPRE_Int erelax_type = maxwell_data-> erelax_type; HYPRE_Int edge_numlevs = maxwell_data-> edge_numlevels; HYPRE_Int **BdryRanks_l = maxwell_data-> BdryRanks_l; HYPRE_Int *BdryRanksCnts_l= maxwell_data-> BdryRanksCnts_l; HYPRE_Int logging = maxwell_data-> logging; double *norms = maxwell_data-> norms; double *rel_norms = maxwell_data-> rel_norms; HYPRE_Int Solve_err_flag; HYPRE_Int relax_local, cycle_param; double b_dot_b = 0, r_dot_r, eps = 0; double e_dot_e, x_dot_x; HYPRE_Int i, j; HYPRE_Int level; HYPRE_Int ierr= 0; /* added for the relaxation routines */ hypre_ParVector *ze = NULL; if (hypre_NumThreads() > 1) { /* Aee is always bigger than Ann */ ze = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(Aee_l[0]), hypre_ParCSRMatrixGlobalNumRows(Aee_l[0]), hypre_ParCSRMatrixRowStarts(Aee_l[0])); hypre_ParVectorInitialize(ze); hypre_ParVectorSetPartitioningOwner(ze,0); } hypre_BeginTiming(maxwell_data-> time_index); hypre_SStructVectorConvert(f, &f_edge); hypre_SStructVectorConvert(u, &u_edge); hypre_ParVectorZeroBCValues(f_edge, BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParVectorZeroBCValues(u_edge, BdryRanks_l[0], BdryRanksCnts_l[0]); be_l[0]= f_edge; xe_l[0]= u_edge; /* the nodal fine vectors: xn= 0. bn= T'*(be- Aee*xe) is updated in the cycle. */ hypre_ParVectorSetConstantValues(xn_l[0], 0.0); relax_local= 0; cycle_param= 0; (maxwell_data-> num_iterations) = 0; /* if max_iter is zero, return */ if (max_iter == 0) { /* if using a zero initial guess, return zero */ if (zero_guess) { hypre_ParVectorSetConstantValues(xe_l[0], 0.0); } hypre_EndTiming(maxwell_data -> time_index); return ierr; } /* part of convergence check */ if (tol > 0.0) { /* eps = (tol^2) */ b_dot_b= hypre_ParVectorInnerProd(be_l[0], be_l[0]); eps = tol*tol; /* if rhs is zero, return a zero solution */ if (b_dot_b == 0.0) { hypre_ParVectorSetConstantValues(xe_l[0], 0.0); if (logging > 0) { norms[0] = 0.0; rel_norms[0] = 0.0; } hypre_EndTiming(maxwell_data -> time_index); return ierr; } } /*----------------------------------------------------- * Do V-cycles: * For each index l, "fine" = l, "coarse" = (l-1) * * solution update: * edge_sol= edge_sol + T*node_sol *-----------------------------------------------------*/ for (i = 0; i < max_iter; i++) { /* compute fine grid residual & nodal rhs. */ hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); hypre_ParVectorZeroBCValues(rese_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParCSRMatrixMatvec(1.0, T_transpose, rese_l[0], 0.0, bn_l[0]); /* convergence check */ if (tol > 0.0) { r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]); if (logging > 0) { norms[i] = sqrt(r_dot_r); if (b_dot_b > 0) rel_norms[i] = sqrt(r_dot_r/b_dot_b); else rel_norms[i] = 0.0; } /* always do at least 1 V-cycle */ if ((r_dot_r/b_dot_b < eps) && (i > 0)) { if (rel_change) { if ((e_dot_e/x_dot_x) < eps) break; } else { break; } } } hypre_ParVectorCopy(bn_l[0], resn_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]); r_dot_r= hypre_ParVectorInnerProd(resn_l[0], resn_l[0]); for (level= 0; level<= node_numlevs-2; level++) { /*----------------------------------------------- * Down cycle *-----------------------------------------------*/ for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); } /*for (j= 0; j< npre_relax; j++) */ /* compute residuals */ hypre_ParVectorCopy(bn_l[level], resn_l[level]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[level], xn_l[level], 1.0, resn_l[level]); /* restrict residuals */ hypre_ParCSRMatrixMatvecT(1.0, RnT_l[level], resn_l[level], 0.0, bn_l[level+1]); /* zero off initial guess for the next level */ hypre_ParVectorSetConstantValues(xn_l[level+1], 0.0); } /* for (level= 0; level<= node_numlevs-2; level++) */ /* coarsest node solve */ level= node_numlevs-1; Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); /*--------------------------------------------------------------------- * Cycle up the levels. *---------------------------------------------------------------------*/ for (level= (node_numlevs - 2); level>= 1; level--) { hypre_ParCSRMatrixMatvec(1.0, Pn_l[level], xn_l[level+1], 0.0, en_l[level]); hypre_ParVectorAxpy(1.0, en_l[level], xn_l[level]); /* post smooth */ for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[level], bn_l[level], nCF_marker_l[level], nrelax_type, relax_local, cycle_param, nrelax_weight[level], nomega[level], NULL, xn_l[level], nVtemp2_l[level], ze); } } /* for (level= (en_numlevs - 2); level>= 1; level--) */ /* interpolate error and correct on finest grids */ hypre_ParCSRMatrixMatvec(1.0, Pn_l[0], xn_l[1], 0.0, en_l[0]); hypre_ParVectorAxpy(1.0, en_l[0], xn_l[0]); for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Ann_l[0], bn_l[0], nCF_marker_l[0], nrelax_type, relax_local, cycle_param, nrelax_weight[0], nomega[0], NULL, xn_l[0], nVtemp2_l[0], ze); } /* for (j= 0; j< npost_relax; j++) */ hypre_ParVectorCopy(bn_l[0], resn_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Ann_l[0], xn_l[0], 1.0, resn_l[0]); /* add the gradient solution component to xe_l[0] */ hypre_ParCSRMatrixMatvec(1.0, Tgrad, xn_l[0], 1.0, xe_l[0]); hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); r_dot_r= hypre_ParVectorInnerProd(rese_l[0], rese_l[0]); for (level= 0; level<= edge_numlevs-2; level++) { /*----------------------------------------------- * Down cycle *-----------------------------------------------*/ for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } /*for (j= 0; j< npre_relax; j++) */ /* compute residuals */ hypre_ParVectorCopy(be_l[level], rese_l[level]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[level], xe_l[level], 1.0, rese_l[level]); /* restrict residuals */ hypre_ParCSRMatrixMatvecT(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(ReT_l[level]), rese_l[level], 0.0, be_l[level+1]); hypre_ParVectorZeroBCValues(be_l[level+1], BdryRanks_l[level+1], BdryRanksCnts_l[level+1]); /* zero off initial guess for the next level */ hypre_ParVectorSetConstantValues(xe_l[level+1], 0.0); } /* for (level= 1; level<= edge_numlevels-2; level++) */ /* coarsest edge solve */ level= edge_numlevs-1; for (j= 0; j< npre_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } /*--------------------------------------------------------------------- * Up cycle. *---------------------------------------------------------------------*/ for (level= (edge_numlevs - 2); level>= 1; level--) { hypre_ParCSRMatrixMatvec(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[level]), xe_l[level+1], 0.0, ee_l[level]); hypre_ParVectorZeroBCValues(ee_l[level], BdryRanks_l[level], BdryRanksCnts_l[level]); hypre_ParVectorAxpy(1.0, ee_l[level], xe_l[level]); /* post smooth */ for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[level], be_l[level], eCF_marker_l[level], erelax_type, relax_local, cycle_param, erelax_weight[level], eomega[level], NULL, xe_l[level], eVtemp2_l[level], ze); } } /* for (level= (edge_numlevs - 2); level>= 1; level--) */ /* interpolate error and correct on finest grids */ hypre_ParCSRMatrixMatvec(1.0, (hypre_ParCSRMatrix *) hypre_IJMatrixObject(Pe_l[0]), xe_l[1], 0.0, ee_l[0]); hypre_ParVectorZeroBCValues(ee_l[0], BdryRanks_l[0], BdryRanksCnts_l[0]); hypre_ParVectorAxpy(1.0, ee_l[0], xe_l[0]); for (j= 0; j< npost_relax; j++) { Solve_err_flag = hypre_BoomerAMGRelaxIF(Aee_l[0], be_l[0], eCF_marker_l[0], erelax_type, relax_local, cycle_param, erelax_weight[0], eomega[0], NULL, xe_l[0], eVtemp2_l[0], ze); } /* for (j= 0; j< npost_relax; j++) */ e_dot_e= hypre_ParVectorInnerProd(ee_l[0], ee_l[0]); x_dot_x= hypre_ParVectorInnerProd(xe_l[0], xe_l[0]); hypre_ParVectorCopy(be_l[0], rese_l[0]); hypre_ParCSRMatrixMatvec(-1.0, Aee_l[0], xe_l[0], 1.0, rese_l[0]); (maxwell_data -> num_iterations) = (i + 1); } hypre_EndTiming(maxwell_data -> time_index); if (ze) hypre_ParVectorDestroy(ze); return ierr; }
int hypre_CSRMatrixMatvecT( double alpha, hypre_CSRMatrix *A, hypre_Vector *x, double beta, hypre_Vector *y ) { double *A_data = hypre_CSRMatrixData(A); int *A_i = hypre_CSRMatrixI(A); int *A_j = hypre_CSRMatrixJ(A); int num_rows = hypre_CSRMatrixNumRows(A); int num_cols = hypre_CSRMatrixNumCols(A); double *x_data = hypre_VectorData(x); double *y_data = hypre_VectorData(y); int x_size = hypre_VectorSize(x); int y_size = hypre_VectorSize(y); int num_vectors = hypre_VectorNumVectors(x); int idxstride_y = hypre_VectorIndexStride(y); int vecstride_y = hypre_VectorVectorStride(y); int idxstride_x = hypre_VectorIndexStride(x); int vecstride_x = hypre_VectorVectorStride(x); double temp; double *y_data_expand = NULL; int offset = 0; #ifdef HYPRE_USING_OPENMP int my_thread_num = 0; #endif int i, j, jv, jj; int num_threads; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) schedule(static) #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { y_data_expand = hypre_CTAlloc(double, num_threads*y_size); if ( num_vectors==1 ) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,j, my_thread_num, offset) { my_thread_num = omp_get_thread_num(); offset = y_size*my_thread_num; #pragma omp for schedule(static) #endif for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data_expand[offset + j] += A_data[jj] * x_data[i]; } } #ifdef HYPRE_USING_OPENMP /* implied barrier */ #pragma omp for schedule(static) #endif for (i = 0; i < y_size; i++) { for (j = 0; j < num_threads; j++) { y_data[i] += y_data_expand[j*y_size + i]; /*y_data_expand[j*y_size + i] = 0; //zero out for next time */ } } #ifdef HYPRE_USING_OPENMP } /* end parallel region */ #endif hypre_TFree(y_data_expand); } else { /* MULTIPLE VECTORS NOT THREADED YET */ for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } hypre_TFree(y_data_expand); }
HYPRE_Int hypre_CSRMatrixMatvecT( HYPRE_Complex alpha, hypre_CSRMatrix *A, hypre_Vector *x, HYPRE_Complex beta, hypre_Vector *y ) { HYPRE_Complex *A_data = hypre_CSRMatrixData(A); HYPRE_Int *A_i = hypre_CSRMatrixI(A); HYPRE_Int *A_j = hypre_CSRMatrixJ(A); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A); HYPRE_Int num_cols = hypre_CSRMatrixNumCols(A); HYPRE_Complex *x_data = hypre_VectorData(x); HYPRE_Complex *y_data = hypre_VectorData(y); HYPRE_Int x_size = hypre_VectorSize(x); HYPRE_Int y_size = hypre_VectorSize(y); HYPRE_Int num_vectors = hypre_VectorNumVectors(x); HYPRE_Int idxstride_y = hypre_VectorIndexStride(y); HYPRE_Int vecstride_y = hypre_VectorVectorStride(y); HYPRE_Int idxstride_x = hypre_VectorIndexStride(x); HYPRE_Int vecstride_x = hypre_VectorVectorStride(x); HYPRE_Complex temp; HYPRE_Complex *y_data_expand; HYPRE_Int my_thread_num = 0, offset = 0; HYPRE_Int i, j, jv, jj; HYPRE_Int num_threads; HYPRE_Int ierr = 0; hypre_Vector *x_tmp = NULL; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ hypre_assert( num_vectors == hypre_VectorNumVectors(y) ); if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- * Do (alpha == 0.0) computation - RDF: USE MACHINE EPS *-----------------------------------------------------------------------*/ if (alpha == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= beta; return ierr; } if (x == y) { x_tmp = hypre_SeqVectorCloneDeep(x); x_data = hypre_VectorData(x_tmp); } /*----------------------------------------------------------------------- * y = (beta/alpha)*y *-----------------------------------------------------------------------*/ temp = beta / alpha; if (temp != 1.0) { if (temp == 0.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] = 0.0; } else { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= temp; } } /*----------------------------------------------------------------- * y += A^T*x *-----------------------------------------------------------------*/ num_threads = hypre_NumThreads(); if (num_threads > 1) { y_data_expand = hypre_CTAlloc(HYPRE_Complex, num_threads*y_size); if ( num_vectors==1 ) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel private(i,jj,j,my_thread_num,offset) #endif { my_thread_num = hypre_GetThreadNum(); offset = y_size*my_thread_num; #ifdef HYPRE_USING_OPENMP #pragma omp for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data_expand[offset + j] += A_data[jj] * x_data[i]; } } /* implied barrier (for threads)*/ #ifdef HYPRE_USING_OPENMP #pragma omp for HYPRE_SMP_SCHEDULE #endif for (i = 0; i < y_size; i++) { for (j = 0; j < num_threads; j++) { y_data[i] += y_data_expand[j*y_size + i]; } } } /* end parallel threaded region */ } else { /* multiple vector case is not threaded */ for (i = 0; i < num_rows; i++) { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x]; } } } } hypre_TFree(y_data_expand); } else { for (i = 0; i < num_rows; i++) { if ( num_vectors==1 ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[j] += A_data[jj] * x_data[i]; } } else { for ( jv=0; jv<num_vectors; ++jv ) { for (jj = A_i[i]; jj < A_i[i+1]; jj++) { j = A_j[jj]; y_data[ j*idxstride_y + jv*vecstride_y ] += A_data[jj] * x_data[ i*idxstride_x + jv*vecstride_x ]; } } } } } /*----------------------------------------------------------------- * y = alpha*y *-----------------------------------------------------------------*/ if (alpha != 1.0) { #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_cols*num_vectors; i++) y_data[i] *= alpha; } if (x == y) hypre_SeqVectorDestroy(x_tmp); return ierr; }
HYPRE_Int hypre_BoomerAMGCycle( void *amg_vdata, hypre_ParVector **F_array, hypre_ParVector **U_array ) { hypre_ParAMGData *amg_data = amg_vdata; HYPRE_Solver *smoother; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_ParCSRMatrix **R_array; hypre_ParVector *Utemp; hypre_ParVector *Vtemp; hypre_ParVector *Rtemp; hypre_ParVector *Ptemp; hypre_ParVector *Ztemp; hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; hypre_ParCSRBlockMatrix **A_block_array; hypre_ParCSRBlockMatrix **P_block_array; hypre_ParCSRBlockMatrix **R_block_array; HYPRE_Real *Ztemp_data; HYPRE_Real *Ptemp_data; HYPRE_Int **CF_marker_array; /* HYPRE_Int **unknown_map_array; HYPRE_Int **point_map_array; HYPRE_Int **v_at_point_array; */ HYPRE_Real cycle_op_count; HYPRE_Int cycle_type; HYPRE_Int num_levels; HYPRE_Int max_levels; HYPRE_Real *num_coeffs; HYPRE_Int *num_grid_sweeps; HYPRE_Int *grid_relax_type; HYPRE_Int **grid_relax_points; HYPRE_Int block_mode; HYPRE_Real *max_eig_est; HYPRE_Real *min_eig_est; HYPRE_Int cheby_order; HYPRE_Real cheby_fraction; /* Local variables */ HYPRE_Int *lev_counter; HYPRE_Int Solve_err_flag; HYPRE_Int k; HYPRE_Int i, j, jj; HYPRE_Int level; HYPRE_Int cycle_param; HYPRE_Int coarse_grid; HYPRE_Int fine_grid; HYPRE_Int Not_Finished; HYPRE_Int num_sweep; HYPRE_Int cg_num_sweep = 1; HYPRE_Int relax_type; HYPRE_Int relax_points; HYPRE_Int relax_order; HYPRE_Int relax_local; HYPRE_Int old_version = 0; HYPRE_Real *relax_weight; HYPRE_Real *omega; HYPRE_Real alfa, beta, gammaold; HYPRE_Real gamma = 1.0; HYPRE_Int local_size; /* HYPRE_Int *smooth_option; */ HYPRE_Int smooth_type; HYPRE_Int smooth_num_levels; HYPRE_Int num_threads, my_id; HYPRE_Real alpha; HYPRE_Real **l1_norms = NULL; HYPRE_Real *l1_norms_level; HYPRE_Int seq_cg = 0; MPI_Comm comm; #if 0 HYPRE_Real *D_mat; HYPRE_Real *S_vec; #endif /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); R_array = hypre_ParAMGDataRArray(amg_data); CF_marker_array = hypre_ParAMGDataCFMarkerArray(amg_data); Vtemp = hypre_ParAMGDataVtemp(amg_data); Rtemp = hypre_ParAMGDataRtemp(amg_data); Ptemp = hypre_ParAMGDataPtemp(amg_data); Ztemp = hypre_ParAMGDataZtemp(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); max_levels = hypre_ParAMGDataMaxLevels(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); A_block_array = hypre_ParAMGDataABlockArray(amg_data); P_block_array = hypre_ParAMGDataPBlockArray(amg_data); R_block_array = hypre_ParAMGDataRBlockArray(amg_data); block_mode = hypre_ParAMGDataBlockMode(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); omega = hypre_ParAMGDataOmega(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); l1_norms = hypre_ParAMGDataL1Norms(amg_data); /* smooth_option = hypre_ParAMGDataSmoothOption(amg_data); */ max_eig_est = hypre_ParAMGDataMaxEigEst(amg_data); min_eig_est = hypre_ParAMGDataMinEigEst(amg_data); cheby_order = hypre_ParAMGDataChebyOrder(amg_data); cheby_fraction = hypre_ParAMGDataChebyFraction(amg_data); cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); lev_counter = hypre_CTAlloc(HYPRE_Int, num_levels); if (hypre_ParAMGDataParticipate(amg_data)) seq_cg = 1; /* Initialize */ Solve_err_flag = 0; if (grid_relax_points) old_version = 1; num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A_array[0]); comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_rank(comm,&my_id); if (block_mode) { for (j = 1; j < num_levels; j++) num_coeffs[j] = hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]); } else { for (j = 1; j < num_levels; j++) num_coeffs[j] = hypre_ParCSRMatrixDNumNonzeros(A_array[j]); } /*--------------------------------------------------------------------- * Initialize cycling control counter * * Cycling is controlled using a level counter: lev_counter[k] * * Each time relaxation is performed on level k, the * counter is decremented by 1. If the counter is then * negative, we go to the next finer level. If non- * negative, we go to the next coarser level. The * following actions control cycling: * * a. lev_counter[0] is initialized to 1. * b. lev_counter[k] is initialized to cycle_type for k>0. * * c. During cycling, when going down to level k, lev_counter[k] * is set to the max of (lev_counter[k],cycle_type) *---------------------------------------------------------------------*/ Not_Finished = 1; lev_counter[0] = 1; for (k = 1; k < num_levels; ++k) { lev_counter[k] = cycle_type; } level = 0; cycle_param = 1; smoother = hypre_ParAMGDataSmoother(amg_data); if (smooth_num_levels > 0) { if (smooth_type == 7 || smooth_type == 8 || smooth_type == 17 || smooth_type == 18 || smooth_type == 9 || smooth_type == 19) { HYPRE_Int actual_local_size = hypre_ParVectorActualLocalSize(Vtemp); Utemp = hypre_ParVectorCreate(comm,hypre_ParVectorGlobalSize(Vtemp), hypre_ParVectorPartitioning(Vtemp)); hypre_ParVectorOwnsPartitioning(Utemp) = 0; local_size = hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)); if (local_size < actual_local_size) { hypre_VectorData(hypre_ParVectorLocalVector(Utemp)) = hypre_CTAlloc(HYPRE_Complex, actual_local_size); hypre_ParVectorActualLocalSize(Utemp) = actual_local_size; } else hypre_ParVectorInitialize(Utemp); } } /*--------------------------------------------------------------------- * Main loop of cycling *--------------------------------------------------------------------*/ while (Not_Finished) { if (num_levels > 1) { local_size = hypre_VectorSize(hypre_ParVectorLocalVector(F_array[level])); hypre_VectorSize(hypre_ParVectorLocalVector(Vtemp)) = local_size; if (smooth_num_levels <= level) { cg_num_sweep = 1; num_sweep = num_grid_sweeps[cycle_param]; Aux_U = U_array[level]; Aux_F = F_array[level]; } else if (smooth_type > 9) { hypre_VectorSize(hypre_ParVectorLocalVector(Ztemp)) = local_size; hypre_VectorSize(hypre_ParVectorLocalVector(Rtemp)) = local_size; hypre_VectorSize(hypre_ParVectorLocalVector(Ptemp)) = local_size; Ztemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ztemp)); Ptemp_data = hypre_VectorData(hypre_ParVectorLocalVector(Ptemp)); hypre_ParVectorSetConstantValues(Ztemp,0); alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], U_array[level], beta, F_array[level], Rtemp); cg_num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data); num_sweep = num_grid_sweeps[cycle_param]; Aux_U = Ztemp; Aux_F = Rtemp; } else { cg_num_sweep = 1; num_sweep = hypre_ParAMGDataSmoothNumSweeps(amg_data); Aux_U = U_array[level]; Aux_F = F_array[level]; } relax_type = grid_relax_type[cycle_param]; } else /* AB: 4/08: removed the max_levels > 1 check - should do this when max-levels = 1 also */ { /* If no coarsening occurred, apply a simple smoother once */ Aux_U = U_array[level]; Aux_F = F_array[level]; num_sweep = 1; /* TK: Use the user relax type (instead of 0) to allow for setting a convergent smoother (e.g. in the solution of singular problems). */ relax_type = hypre_ParAMGDataUserRelaxType(amg_data); } if (l1_norms != NULL) l1_norms_level = l1_norms[level]; else l1_norms_level = NULL; if (cycle_param == 3 && seq_cg) { hypre_seqAMGCycle(amg_data, level, F_array, U_array); } else { /*------------------------------------------------------------------ * Do the relaxation num_sweep times *-----------------------------------------------------------------*/ for (jj = 0; jj < cg_num_sweep; jj++) { if (smooth_num_levels > level && smooth_type > 9) hypre_ParVectorSetConstantValues(Aux_U,0); for (j = 0; j < num_sweep; j++) { if (num_levels == 1 && max_levels > 1) { relax_points = 0; relax_local = 0; } else { if (old_version) relax_points = grid_relax_points[cycle_param][j]; relax_local = relax_order; } /*----------------------------------------------- * VERY sloppy approximation to cycle complexity *-----------------------------------------------*/ if (old_version && level < num_levels -1) { switch (relax_points) { case 1: cycle_op_count += num_coeffs[level+1]; break; case -1: cycle_op_count += (num_coeffs[level]-num_coeffs[level+1]); break; } } else { cycle_op_count += num_coeffs[level]; } /*----------------------------------------------- Choose Smoother -----------------------------------------------*/ if (smooth_num_levels > level && (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || smooth_type == 19 || smooth_type == 17 || smooth_type == 18)) { hypre_VectorSize(hypre_ParVectorLocalVector(Utemp)) = local_size; alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[level], U_array[level], beta, Aux_F, Vtemp); if (smooth_type == 8 || smooth_type == 18) HYPRE_ParCSRParaSailsSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); else if (smooth_type == 7 || smooth_type == 17) HYPRE_ParCSRPilutSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); else if (smooth_type == 9 || smooth_type == 19) HYPRE_EuclidSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Vtemp, (HYPRE_ParVector) Utemp); hypre_ParVectorAxpy(relax_weight[level],Utemp,Aux_U); } else if (smooth_num_levels > level && (smooth_type == 6 || smooth_type == 16)) { HYPRE_SchwarzSolve(smoother[level], (HYPRE_ParCSRMatrix) A_array[level], (HYPRE_ParVector) Aux_F, (HYPRE_ParVector) Aux_U); } /*else if (relax_type == 99)*/ else if (relax_type == 9 || relax_type == 99) { /* Gaussian elimination */ hypre_GaussElimSolve(amg_data, level, relax_type); } else if (relax_type == 18) { /* L1 - Jacobi*/ if (relax_order == 1 && cycle_param < 3) { /* need to do CF - so can't use the AMS one */ HYPRE_Int i; HYPRE_Int loc_relax_points[2]; if (cycle_type < 2) { loc_relax_points[0] = 1; loc_relax_points[1] = -1; } else { loc_relax_points[0] = -1; loc_relax_points[1] = 1; } for (i=0; i < 2; i++) hypre_ParCSRRelax_L1_Jacobi(A_array[level], Aux_F, CF_marker_array[level], loc_relax_points[i], relax_weight[level], l1_norms[level], Aux_U, Vtemp); } else /* not CF - so use through AMS */ { if (num_threads == 1) hypre_ParCSRRelax(A_array[level], Aux_F, 1, 1, l1_norms_level, relax_weight[level], omega[level],0,0,0,0, Aux_U, Vtemp, Ztemp); else hypre_ParCSRRelaxThreads(A_array[level], Aux_F, 1, 1, l1_norms_level, relax_weight[level], omega[level], Aux_U, Vtemp, Ztemp); } } else if (relax_type == 15) { /* CG */ if (j ==0) /* do num sweep iterations of CG */ hypre_ParCSRRelax_CG( smoother[level], A_array[level], Aux_F, Aux_U, num_sweep); } else if (relax_type == 16) { /* scaled Chebyshev */ HYPRE_Int scale = 1; HYPRE_Int variant = 0; hypre_ParCSRRelax_Cheby(A_array[level], Aux_F, max_eig_est[level], min_eig_est[level], cheby_fraction, cheby_order, scale, variant, Aux_U, Vtemp, Ztemp ); } else if (relax_type ==17) { hypre_BoomerAMGRelax_FCFJacobi(A_array[level], Aux_F, CF_marker_array[level], relax_weight[level], Aux_U, Vtemp); } else if (old_version) { Solve_err_flag = hypre_BoomerAMGRelax(A_array[level], Aux_F, CF_marker_array[level], relax_type, relax_points, relax_weight[level], omega[level], l1_norms_level, Aux_U, Vtemp, Ztemp); } else { /* smoother than can have CF ordering */ if (block_mode) { Solve_err_flag = hypre_BoomerAMGBlockRelaxIF(A_block_array[level], Aux_F, CF_marker_array[level], relax_type, relax_local, cycle_param, relax_weight[level], omega[level], Aux_U, Vtemp); } else { Solve_err_flag = hypre_BoomerAMGRelaxIF(A_array[level], Aux_F, CF_marker_array[level], relax_type, relax_local, cycle_param, relax_weight[level], omega[level], l1_norms_level, Aux_U, Vtemp, Ztemp); } } if (Solve_err_flag != 0) return(Solve_err_flag); } if (smooth_num_levels > level && smooth_type > 9) { gammaold = gamma; gamma = hypre_ParVectorInnerProd(Rtemp,Ztemp); if (jj == 0) hypre_ParVectorCopy(Ztemp,Ptemp); else { beta = gamma/gammaold; for (i=0; i < local_size; i++) Ptemp_data[i] = Ztemp_data[i] + beta*Ptemp_data[i]; } hypre_ParCSRMatrixMatvec(1.0,A_array[level],Ptemp,0.0,Vtemp); alfa = gamma /hypre_ParVectorInnerProd(Ptemp,Vtemp); hypre_ParVectorAxpy(alfa,Ptemp,U_array[level]); hypre_ParVectorAxpy(-alfa,Vtemp,Rtemp); } } } /*------------------------------------------------------------------ * Decrement the control counter and determine which grid to visit next *-----------------------------------------------------------------*/ --lev_counter[level]; if (lev_counter[level] >= 0 && level != num_levels-1) { /*--------------------------------------------------------------- * Visit coarser level next. * Compute residual using hypre_ParCSRMatrixMatvec. * Perform restriction using hypre_ParCSRMatrixMatvecT. * Reset counters and cycling parameters for coarse level *--------------------------------------------------------------*/ fine_grid = level; coarse_grid = level + 1; hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); alpha = -1.0; beta = 1.0; if (block_mode) { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); hypre_ParCSRBlockMatrixMatvec(alpha, A_block_array[fine_grid], U_array[fine_grid], beta, Vtemp); } else { // JSP: avoid unnecessary copy using out-of-place version of SpMV hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[fine_grid], U_array[fine_grid], beta, F_array[fine_grid], Vtemp); } alpha = 1.0; beta = 0.0; if (block_mode) { hypre_ParCSRBlockMatrixMatvecT(alpha,R_block_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } else { hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } ++level; lev_counter[level] = hypre_max(lev_counter[level],cycle_type); cycle_param = 1; if (level == num_levels-1) cycle_param = 3; } else if (level != 0) { /*--------------------------------------------------------------- * Visit finer level next. * Interpolate and add correction using hypre_ParCSRMatrixMatvec. * Reset counters and cycling parameters for finer level. *--------------------------------------------------------------*/ fine_grid = level - 1; coarse_grid = level; alpha = 1.0; beta = 1.0; if (block_mode) { hypre_ParCSRBlockMatrixMatvec(alpha, P_block_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } else { hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } --level; cycle_param = 2; } else { Not_Finished = 0; } } hypre_ParAMGDataCycleOpCount(amg_data) = cycle_op_count; hypre_TFree(lev_counter); hypre_TFree(num_coeffs); if (smooth_num_levels > 0) { if (smooth_type == 7 || smooth_type == 8 || smooth_type == 9 || smooth_type == 17 || smooth_type == 18 || smooth_type == 19 ) hypre_ParVectorDestroy(Utemp); } return(Solve_err_flag); }
HYPRE_Int hypre_CreateDinv(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_CSRMatrix *A_tmp_diag; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *tmp_data; HYPRE_Real *D_inv = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; HYPRE_Int addlvl; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_rows_L; HYPRE_Int num_rows_A; HYPRE_Int num_rows_tmp; HYPRE_Int level, i; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *l1_norms; HYPRE_Int l1_start; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); addlvl = hypre_ParAMGDataSimple(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; num_rows_A = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[0])); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); /* smooth_option = hypre_ParAMGDataSmoothOption(amg_data); */ num_add_lvls = num_levels+1-addlvl; num_rows_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_rows_L += num_rows_tmp; } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); D_inv = hypre_CTAlloc(HYPRE_Real, num_rows_L); l1_start = 0; for (level=addlvl; level < num_levels; level++) { if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[l1_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[l1_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; HYPRE_Int *A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); HYPRE_Real *A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) D_inv[l1_start+i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) D_inv[l1_start+i] = 1.0/l1_norms[i]; } l1_start += num_rows_tmp; } hypre_ParAMGDataDinv(amg_data) = D_inv; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; return Solve_err_flag; }
HYPRE_Int hypre_BoomerAMGAdditiveCycle( void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_ParCSRMatrix **R_array; hypre_ParCSRMatrix *Lambda; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParVector *Vtemp; hypre_ParVector *Ztemp; hypre_ParVector *Xtilde, *Rtilde; HYPRE_Int **CF_marker_array; HYPRE_Int num_levels; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int simple; HYPRE_Int i, num_rows; HYPRE_Int n_global; HYPRE_Int rlx_order; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int level; HYPRE_Int coarse_grid; HYPRE_Int fine_grid; HYPRE_Int relax_type; HYPRE_Int rlx_down; HYPRE_Int rlx_up; HYPRE_Int *grid_relax_type; HYPRE_Real **l1_norms; HYPRE_Real alpha, beta; HYPRE_Int num_threads; HYPRE_Real *u_data; HYPRE_Real *f_data; HYPRE_Real *v_data; HYPRE_Real *l1_norms_lvl; HYPRE_Real *D_inv; HYPRE_Real *x_global; HYPRE_Real *r_global; HYPRE_Real *relax_weight; HYPRE_Real *omega; #if 0 HYPRE_Real *D_mat; HYPRE_Real *S_vec; #endif /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); R_array = hypre_ParAMGDataRArray(amg_data); CF_marker_array = hypre_ParAMGDataCFMarkerArray(amg_data); Vtemp = hypre_ParAMGDataVtemp(amg_data); Ztemp = hypre_ParAMGDataZtemp(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); simple = hypre_ParAMGDataSimple(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); Lambda = hypre_ParAMGDataLambda(amg_data); Xtilde = hypre_ParAMGDataXtilde(amg_data); Rtilde = hypre_ParAMGDataRtilde(amg_data); l1_norms = hypre_ParAMGDataL1Norms(amg_data); D_inv = hypre_ParAMGDataDinv(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); omega = hypre_ParAMGDataOmega(amg_data); rlx_order = hypre_ParAMGDataRelaxOrder(amg_data); /* Initialize */ addlvl = hypre_max(additive, mult_additive); addlvl = hypre_max(addlvl, simple); Solve_err_flag = 0; /*--------------------------------------------------------------------- * Main loop of cycling --- multiplicative version --- V-cycle *--------------------------------------------------------------------*/ /* down cycle */ relax_type = grid_relax_type[1]; rlx_down = grid_relax_type[1]; rlx_up = grid_relax_type[2]; for (level = 0; level < num_levels-1; level++) { fine_grid = level; coarse_grid = level + 1; u_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[fine_grid])); f_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[fine_grid])); v_data = hypre_VectorData(hypre_ParVectorLocalVector(Vtemp)); l1_norms_lvl = l1_norms[level]; hypre_ParVectorSetConstantValues(U_array[coarse_grid], 0.0); if (level < addlvl) /* multiplicative version */ { /* smoothing step */ if (rlx_down == 0) { HYPRE_Real *A_data = hypre_CSRMatrixData(hypre_ParCSRMatrixDiag(A_array[fine_grid])); HYPRE_Int *A_i = hypre_CSRMatrixI(hypre_ParCSRMatrixDiag(A_array[fine_grid])); hypre_ParVectorCopy(F_array[fine_grid],Vtemp); num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid])); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) u_data[i] = relax_weight[level]*v_data[i] / A_data[A_i[i]]; } else if (rlx_down != 18) { /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_down,0,*/ hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], rlx_down,rlx_order,1, relax_weight[fine_grid], omega[fine_grid], l1_norms_lvl, U_array[fine_grid], Vtemp, Ztemp); hypre_ParVectorCopy(F_array[fine_grid],Vtemp); } else { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); num_rows = hypre_CSRMatrixNumRows(hypre_ParCSRMatrixDiag(A_array[fine_grid])); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i = 0; i < num_rows; i++) u_data[i] += v_data[i] / l1_norms_lvl[i]; } alpha = -1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, A_array[fine_grid], U_array[fine_grid], beta, Vtemp); alpha = 1.0; beta = 0.0; hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } else /* additive version */ { hypre_ParVectorCopy(F_array[fine_grid],Vtemp); if (level == 0) /* compute residual */ { hypre_ParVectorCopy(Vtemp, Rtilde); hypre_ParVectorCopy(U_array[fine_grid],Xtilde); } alpha = 1.0; beta = 0.0; hypre_ParCSRMatrixMatvecT(alpha,R_array[fine_grid],Vtemp, beta,F_array[coarse_grid]); } } /* solve coarse grid */ if (addlvl < num_levels) { if (simple > -1) { x_global = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_global = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); n_global = hypre_VectorSize(hypre_ParVectorLocalVector(Xtilde)); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < n_global; i++) x_global[i] += D_inv[i]*r_global[i]; } else hypre_ParCSRMatrixMatvec(1.0, Lambda, Rtilde, 1.0, Xtilde); if (addlvl == 0) hypre_ParVectorCopy(Xtilde, U_array[0]); } else { fine_grid = num_levels -1; hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid], 1, 1, l1_norms[fine_grid], 1.0, 1.0 ,0,0,0,0, U_array[fine_grid], Vtemp, Ztemp); } /* up cycle */ relax_type = grid_relax_type[2]; for (level = num_levels-1; level > 0; level--) { fine_grid = level - 1; coarse_grid = level; if (level <= addlvl) /* multiplicative version */ { alpha = 1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); if (rlx_up != 18) /*hypre_BoomerAMGRelax(A_array[fine_grid],F_array[fine_grid],NULL,rlx_up,0,*/ hypre_BoomerAMGRelaxIF(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], rlx_up,rlx_order,2, relax_weight[fine_grid], omega[fine_grid], l1_norms[fine_grid], U_array[fine_grid], Vtemp, Ztemp); else if (rlx_order) { HYPRE_Int loc_relax_points[2]; loc_relax_points[0] = -1; loc_relax_points[1] = 1; for (i=0; i < 2; i++) hypre_ParCSRRelax_L1_Jacobi(A_array[fine_grid],F_array[fine_grid], CF_marker_array[fine_grid], loc_relax_points[i], 1.0, l1_norms[fine_grid], U_array[fine_grid], Vtemp); } else hypre_ParCSRRelax(A_array[fine_grid], F_array[fine_grid], 1, 1, l1_norms[fine_grid], 1.0, 1.0 ,0,0,0,0, U_array[fine_grid], Vtemp, Ztemp); } else /* additive version */ { alpha = 1.0; beta = 1.0; hypre_ParCSRMatrixMatvec(alpha, P_array[fine_grid], U_array[coarse_grid], beta, U_array[fine_grid]); } } return(Solve_err_flag); }
HYPRE_Int hypre_CreateLambda(void *amg_vdata) { hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ MPI_Comm comm; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRMatrix *A_tmp; hypre_ParCSRMatrix *Lambda; hypre_CSRMatrix *L_diag; hypre_CSRMatrix *L_offd; hypre_CSRMatrix *A_tmp_diag; hypre_CSRMatrix *A_tmp_offd; hypre_ParVector *Xtilde; hypre_ParVector *Rtilde; hypre_Vector *Xtilde_local; hypre_Vector *Rtilde_local; hypre_ParCSRCommPkg *comm_pkg; hypre_ParCSRCommPkg *L_comm_pkg = NULL; hypre_ParCSRCommHandle *comm_handle; HYPRE_Real *L_diag_data; HYPRE_Real *L_offd_data; HYPRE_Real *buf_data = NULL; HYPRE_Real *tmp_data; HYPRE_Real *x_data; HYPRE_Real *r_data; HYPRE_Real *l1_norms; HYPRE_Real *A_tmp_diag_data; HYPRE_Real *A_tmp_offd_data; HYPRE_Real *D_data = NULL; HYPRE_Real *D_data_offd = NULL; HYPRE_Int *L_diag_i; HYPRE_Int *L_diag_j; HYPRE_Int *L_offd_i; HYPRE_Int *L_offd_j; HYPRE_Int *A_tmp_diag_i; HYPRE_Int *A_tmp_offd_i; HYPRE_Int *A_tmp_diag_j; HYPRE_Int *A_tmp_offd_j; HYPRE_Int *L_recv_ptr = NULL; HYPRE_Int *L_send_ptr = NULL; HYPRE_Int *L_recv_procs = NULL; HYPRE_Int *L_send_procs = NULL; HYPRE_Int *L_send_map_elmts = NULL; HYPRE_Int *recv_procs; HYPRE_Int *send_procs; HYPRE_Int *send_map_elmts; HYPRE_Int *send_map_starts; HYPRE_Int *recv_vec_starts; HYPRE_Int *all_send_procs = NULL; HYPRE_Int *all_recv_procs = NULL; HYPRE_Int *remap = NULL; HYPRE_Int *level_start; HYPRE_Int addlvl; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int num_levels; HYPRE_Int num_add_lvls; HYPRE_Int num_procs; HYPRE_Int num_sends, num_recvs; HYPRE_Int num_sends_L = 0; HYPRE_Int num_recvs_L = 0; HYPRE_Int send_data_L = 0; HYPRE_Int num_rows_L = 0; HYPRE_Int num_rows_tmp = 0; HYPRE_Int num_cols_offd_L = 0; HYPRE_Int num_cols_offd = 0; HYPRE_Int level, i, j, k; HYPRE_Int this_proc, cnt, cnt_diag, cnt_offd; HYPRE_Int cnt_recv, cnt_send, cnt_row, row_start; HYPRE_Int start_diag, start_offd, indx, cnt_map; HYPRE_Int start, j_indx, index, cnt_level; HYPRE_Int max_sends, max_recvs; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int num_threads; HYPRE_Int num_nonzeros_diag; HYPRE_Int num_nonzeros_offd; HYPRE_Real **l1_norms_ptr = NULL; HYPRE_Real *relax_weight = NULL; HYPRE_Real relax_type; /* Acquire data and allocate storage */ num_threads = hypre_NumThreads(); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_type = hypre_ParAMGDataGridRelaxType(amg_data)[1]; comm = hypre_ParCSRMatrixComm(A_array[0]); hypre_MPI_Comm_size(comm,&num_procs); l1_norms_ptr = hypre_ParAMGDataL1Norms(amg_data); addlvl = hypre_max(additive, mult_additive); num_add_lvls = num_levels+1-addlvl; level_start = hypre_CTAlloc(HYPRE_Int, num_add_lvls+1); send_data_L = 0; num_rows_L = 0; num_cols_offd_L = 0; num_nonzeros_diag = 0; num_nonzeros_offd = 0; level_start[0] = 0; cnt = 1; max_sends = 0; max_recvs = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); num_cols_offd = hypre_CSRMatrixNumCols(A_tmp_offd); num_rows_L += num_rows_tmp; level_start[cnt] = level_start[cnt-1] + num_rows_tmp; cnt++; num_cols_offd_L += num_cols_offd; num_nonzeros_diag += A_tmp_diag_i[num_rows_tmp]; num_nonzeros_offd += A_tmp_offd_i[num_rows_tmp]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); max_sends += num_sends; if (num_sends) send_data_L += hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends); max_recvs += hypre_ParCSRCommPkgNumRecvs(comm_pkg); } } if (max_sends >= num_procs ||max_recvs >= num_procs) { max_sends = num_procs; max_recvs = num_procs; } if (max_sends) all_send_procs = hypre_CTAlloc(HYPRE_Int, max_sends); if (max_recvs) all_recv_procs = hypre_CTAlloc(HYPRE_Int, max_recvs); cnt_send = 0; cnt_recv = 0; if (max_sends || max_recvs) { if (max_sends < num_procs && max_recvs < num_procs) { for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); for (j = 0; j < num_sends; j++) all_send_procs[cnt_send++] = send_procs[j]; for (j = 0; j < num_recvs; j++) all_recv_procs[cnt_recv++] = recv_procs[j]; } } if (max_sends) { qsort0(all_send_procs, 0, max_sends-1); num_sends_L = 1; this_proc = all_send_procs[0]; for (i=1; i < max_sends; i++) { if (all_send_procs[i] > this_proc) { this_proc = all_send_procs[i]; all_send_procs[num_sends_L++] = this_proc; } } L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); for (j=0; j < num_sends_L; j++) L_send_procs[j] = all_send_procs[j]; hypre_TFree(all_send_procs); } if (max_recvs) { qsort0(all_recv_procs, 0, max_recvs-1); num_recvs_L = 1; this_proc = all_recv_procs[0]; for (i=1; i < max_recvs; i++) { if (all_recv_procs[i] > this_proc) { this_proc = all_recv_procs[i]; all_recv_procs[num_recvs_L++] = this_proc; } } L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); for (j=0; j < num_recvs_L; j++) L_recv_procs[j] = all_recv_procs[j]; hypre_TFree(all_recv_procs); } L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } for (k = 0; k < num_sends; k++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[k],num_sends_L); L_send_ptr[this_proc+1] += send_map_starts[k+1]-send_map_starts[k]; } for (k = 0; k < num_recvs; k++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[k],num_recvs_L); L_recv_ptr[this_proc+1] += recv_vec_starts[k+1]-recv_vec_starts[k]; } } L_recv_ptr[0] = 0; for (i=1; i < num_recvs_L; i++) L_recv_ptr[i+1] += L_recv_ptr[i]; L_send_ptr[0] = 0; for (i=1; i < num_sends_L; i++) L_send_ptr[i+1] += L_send_ptr[i]; } else { num_recvs_L = 0; num_sends_L = 0; for (i=addlvl; i < num_levels; i++) { A_tmp = A_array[i]; comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); for (j = 0; j < num_sends; j++) { this_proc = send_procs[j]; if (all_send_procs[this_proc] == 0) num_sends_L++; all_send_procs[this_proc] += send_map_starts[j+1]-send_map_starts[j]; } for (j = 0; j < num_recvs; j++) { this_proc = recv_procs[j]; if (all_recv_procs[this_proc] == 0) num_recvs_L++; all_recv_procs[this_proc] += recv_vec_starts[j+1]-recv_vec_starts[j]; } } } if (max_sends) { L_send_procs = hypre_CTAlloc(HYPRE_Int, num_sends_L); L_send_ptr = hypre_CTAlloc(HYPRE_Int, num_sends_L+1); num_sends_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_send_procs[j]; if (this_proc) { L_send_procs[num_sends_L++] = j; L_send_ptr[num_sends_L] = this_proc + L_send_ptr[num_sends_L-1]; } } } if (max_recvs) { L_recv_procs = hypre_CTAlloc(HYPRE_Int, num_recvs_L); L_recv_ptr = hypre_CTAlloc(HYPRE_Int, num_recvs_L+1); num_recvs_L = 0; for (j=0; j < num_procs; j++) { this_proc = all_recv_procs[j]; if (this_proc) { L_recv_procs[num_recvs_L++] = j; L_recv_ptr[num_recvs_L] = this_proc + L_recv_ptr[num_recvs_L-1]; } } } } } if (max_sends) hypre_TFree(all_send_procs); if (max_recvs) hypre_TFree(all_recv_procs); L_diag = hypre_CSRMatrixCreate(num_rows_L, num_rows_L, num_nonzeros_diag); L_offd = hypre_CSRMatrixCreate(num_rows_L, num_cols_offd_L, num_nonzeros_offd); hypre_CSRMatrixInitialize(L_diag); hypre_CSRMatrixInitialize(L_offd); if (num_nonzeros_diag) { L_diag_data = hypre_CSRMatrixData(L_diag); L_diag_j = hypre_CSRMatrixJ(L_diag); } L_diag_i = hypre_CSRMatrixI(L_diag); if (num_nonzeros_offd) { L_offd_data = hypre_CSRMatrixData(L_offd); L_offd_j = hypre_CSRMatrixJ(L_offd); } L_offd_i = hypre_CSRMatrixI(L_offd); if (num_rows_L) D_data = hypre_CTAlloc(HYPRE_Real,num_rows_L); if (send_data_L) { L_send_map_elmts = hypre_CTAlloc(HYPRE_Int, send_data_L); buf_data = hypre_CTAlloc(HYPRE_Real,send_data_L); } if (num_cols_offd_L) { D_data_offd = hypre_CTAlloc(HYPRE_Real,num_cols_offd_L); /*L_col_map_offd = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L);*/ remap = hypre_CTAlloc(HYPRE_Int, num_cols_offd_L); } Rtilde = hypre_CTAlloc(hypre_ParVector, 1); Rtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Rtilde_local); hypre_ParVectorLocalVector(Rtilde) = Rtilde_local; hypre_ParVectorOwnsData(Rtilde) = 1; Xtilde = hypre_CTAlloc(hypre_ParVector, 1); Xtilde_local = hypre_SeqVectorCreate(num_rows_L); hypre_SeqVectorInitialize(Xtilde_local); hypre_ParVectorLocalVector(Xtilde) = Xtilde_local; hypre_ParVectorOwnsData(Xtilde) = 1; x_data = hypre_VectorData(hypre_ParVectorLocalVector(Xtilde)); r_data = hypre_VectorData(hypre_ParVectorLocalVector(Rtilde)); cnt = 0; cnt_level = 0; cnt_diag = 0; cnt_offd = 0; cnt_row = 1; L_diag_i[0] = 0; L_offd_i[0] = 0; for (level=addlvl; level < num_levels; level++) { row_start = level_start[cnt_level]; if (level != 0) { tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(F_array[level])) = &r_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(F_array[level])) = 0; tmp_data = hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])); if (tmp_data) hypre_TFree(tmp_data); hypre_VectorData(hypre_ParVectorLocalVector(U_array[level])) = &x_data[row_start]; hypre_VectorOwnsData(hypre_ParVectorLocalVector(U_array[level])) = 0; } cnt_level++; start_diag = L_diag_i[cnt_row-1]; start_offd = L_offd_i[cnt_row-1]; A_tmp = A_array[level]; A_tmp_diag = hypre_ParCSRMatrixDiag(A_tmp); A_tmp_offd = hypre_ParCSRMatrixOffd(A_tmp); comm_pkg = hypre_ParCSRMatrixCommPkg(A_tmp); A_tmp_diag_i = hypre_CSRMatrixI(A_tmp_diag); A_tmp_offd_i = hypre_CSRMatrixI(A_tmp_offd); A_tmp_diag_j = hypre_CSRMatrixJ(A_tmp_diag); A_tmp_offd_j = hypre_CSRMatrixJ(A_tmp_offd); A_tmp_diag_data = hypre_CSRMatrixData(A_tmp_diag); A_tmp_offd_data = hypre_CSRMatrixData(A_tmp_offd); num_rows_tmp = hypre_CSRMatrixNumRows(A_tmp_diag); if (comm_pkg) { num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); num_recvs = hypre_ParCSRCommPkgNumRecvs(comm_pkg); send_procs = hypre_ParCSRCommPkgSendProcs(comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs(comm_pkg); send_map_starts = hypre_ParCSRCommPkgSendMapStarts(comm_pkg); send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); recv_vec_starts = hypre_ParCSRCommPkgRecvVecStarts(comm_pkg); } else { num_sends = 0; num_recvs = 0; } /* Compute new combined communication package */ for (i=0; i < num_sends; i++) { this_proc = hypre_BinarySearch(L_send_procs,send_procs[i],num_sends_L); indx = L_send_ptr[this_proc]; for (j=send_map_starts[i]; j < send_map_starts[i+1]; j++) { L_send_map_elmts[indx++] = row_start + send_map_elmts[j]; } L_send_ptr[this_proc] = indx; } cnt_map = 0; for (i = 0; i < num_recvs; i++) { this_proc = hypre_BinarySearch(L_recv_procs,recv_procs[i],num_recvs_L); indx = L_recv_ptr[this_proc]; for (j=recv_vec_starts[i]; j < recv_vec_starts[i+1]; j++) { remap[cnt_map++] = indx++; } L_recv_ptr[this_proc] = indx; } /* Compute Lambda */ if (relax_type == 0) { HYPRE_Real rlx_wt = relax_weight[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = rlx_wt/A_tmp_diag_data[A_tmp_diag_i[i]]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } else { l1_norms = l1_norms_ptr[level]; #ifdef HYPRE_USING_OPENMP #pragma omp for private(i) HYPRE_SMP_SCHEDULE #endif for (i=0; i < num_rows_tmp; i++) { D_data[i] = 1.0/l1_norms[i]; L_diag_i[cnt_row+i] = start_diag + A_tmp_diag_i[i+1]; L_offd_i[cnt_row+i] = start_offd + A_tmp_offd_i[i+1]; } } if (num_procs > 1) { index = 0; for (i=0; i < num_sends; i++) { start = send_map_starts[i]; for (j=start; j < send_map_starts[i+1]; j++) buf_data[index++] = D_data[send_map_elmts[j]]; } comm_handle = hypre_ParCSRCommHandleCreate(1, comm_pkg, buf_data, D_data_offd); hypre_ParCSRCommHandleDestroy(comm_handle); } for (i = 0; i < num_rows_tmp; i++) { j_indx = A_tmp_diag_i[i]; L_diag_data[cnt_diag] = (2.0 - A_tmp_diag_data[j_indx]*D_data[i])*D_data[i]; L_diag_j[cnt_diag++] = i+row_start; for (j=A_tmp_diag_i[i]+1; j < A_tmp_diag_i[i+1]; j++) { j_indx = A_tmp_diag_j[j]; L_diag_data[cnt_diag] = (- A_tmp_diag_data[j]*D_data[j_indx])*D_data[i]; L_diag_j[cnt_diag++] = j_indx+row_start; } for (j=A_tmp_offd_i[i]; j < A_tmp_offd_i[i+1]; j++) { j_indx = A_tmp_offd_j[j]; L_offd_data[cnt_offd] = (- A_tmp_offd_data[j]*D_data_offd[j_indx])*D_data[i]; L_offd_j[cnt_offd++] = remap[j_indx]; } } cnt_row += num_rows_tmp; } if (L_send_ptr) { for (i=num_sends_L-1; i > 0; i--) L_send_ptr[i] = L_send_ptr[i-1]; L_send_ptr[0] = 0; } else L_send_ptr = hypre_CTAlloc(HYPRE_Int,1); if (L_recv_ptr) { for (i=num_recvs_L-1; i > 0; i--) L_recv_ptr[i] = L_recv_ptr[i-1]; L_recv_ptr[0] = 0; } else L_recv_ptr = hypre_CTAlloc(HYPRE_Int,1); L_comm_pkg = hypre_CTAlloc(hypre_ParCSRCommPkg,1); hypre_ParCSRCommPkgNumRecvs(L_comm_pkg) = num_recvs_L; hypre_ParCSRCommPkgNumSends(L_comm_pkg) = num_sends_L; hypre_ParCSRCommPkgRecvProcs(L_comm_pkg) = L_recv_procs; hypre_ParCSRCommPkgSendProcs(L_comm_pkg) = L_send_procs; hypre_ParCSRCommPkgRecvVecStarts(L_comm_pkg) = L_recv_ptr; hypre_ParCSRCommPkgSendMapStarts(L_comm_pkg) = L_send_ptr; hypre_ParCSRCommPkgSendMapElmts(L_comm_pkg) = L_send_map_elmts; hypre_ParCSRCommPkgComm(L_comm_pkg) = comm; Lambda = hypre_CTAlloc(hypre_ParCSRMatrix, 1); hypre_ParCSRMatrixDiag(Lambda) = L_diag; hypre_ParCSRMatrixOffd(Lambda) = L_offd; hypre_ParCSRMatrixCommPkg(Lambda) = L_comm_pkg; hypre_ParCSRMatrixComm(Lambda) = comm; hypre_ParCSRMatrixOwnsData(Lambda) = 1; hypre_ParAMGDataLambda(amg_data) = Lambda; hypre_ParAMGDataRtilde(amg_data) = Rtilde; hypre_ParAMGDataXtilde(amg_data) = Xtilde; hypre_TFree(D_data_offd); hypre_TFree(D_data); if (num_procs > 1) hypre_TFree(buf_data); hypre_TFree(remap); hypre_TFree(buf_data); hypre_TFree(level_start); return Solve_err_flag; }