HYPRE_Int AmgCGCPrepare (hypre_ParCSRMatrix *S,HYPRE_Int nlocal,HYPRE_Int *CF_marker,HYPRE_Int **CF_marker_offd,HYPRE_Int coarsen_type,HYPRE_Int **vrange) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * nlocal : the number of locally created coarse grids * CF_marker, CF_marker_offd : the coare/fine markers * coarsen_type : the coarsening type * vrange : the ranges of the vertices representing coarse grids * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int mpisize,mpirank; HYPRE_Int num_sends; HYPRE_Int *vertexrange=NULL; HYPRE_Int vstart,vend; HYPRE_Int *int_buf_data; HYPRE_Int start; HYPRE_Int i,ii,j; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_ParCSRCommHandle *comm_handle; hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); if (!comm_pkg) { hypre_MatvecCommPkgCreate (S); comm_pkg = hypre_ParCSRMatrixCommPkg (S); } num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); if (coarsen_type % 2 == 0) nlocal++; /* even coarsen_type means allow_emptygrids */ #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int scan_recv; vertexrange = hypre_CTAlloc(HYPRE_Int,2); hypre_MPI_Scan(&nlocal, &scan_recv, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); /* first point in my range */ vertexrange[0] = scan_recv - nlocal; /* first point in next proc's range */ vertexrange[1] = scan_recv; vstart = vertexrange[0]; vend = vertexrange[1]; } #else vertexrange = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange+1,1,HYPRE_MPI_INT,comm); vertexrange[0]=0; for (i=2;i<=mpisize;i++) vertexrange[i]+=vertexrange[i-1]; vstart = vertexrange[mpirank]; vend = vertexrange[mpirank+1]; #endif /* Note: vstart uses 0-based indexing, while CF_marker uses 1-based indexing */ if (coarsen_type % 2 == 1) { /* see above */ for (i=0;i<num_variables;i++) if (CF_marker[i]>0) CF_marker[i]+=vstart; } else { /* hypre_printf ("processor %d: empty grid allowed\n",mpirank); */ for (i=0;i<num_variables;i++) { if (CF_marker[i]>0) CF_marker[i]+=vstart+1; /* add one because vertexrange[mpirank]+1 denotes the empty grid. Hence, vertexrange[mpirank]+2 is the first coarse grid denoted in global indices, ... */ } } /* exchange data */ *CF_marker_offd = hypre_CTAlloc (HYPRE_Int,num_cols_offd); int_buf_data = hypre_CTAlloc (HYPRE_Int,hypre_ParCSRCommPkgSendMapStart (comm_pkg,num_sends)); for (i=0,ii=0;i<num_sends;i++) { start = hypre_ParCSRCommPkgSendMapStart (comm_pkg,i); for (j=start;j<hypre_ParCSRCommPkgSendMapStart (comm_pkg,i+1);j++) int_buf_data [ii++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } if (mpisize>1) { comm_handle = hypre_ParCSRCommHandleCreate (11,comm_pkg,int_buf_data,*CF_marker_offd); hypre_ParCSRCommHandleDestroy (comm_handle); } hypre_TFree (int_buf_data); *vrange=vertexrange; return (ierr); }
HYPRE_Int hypre_BoomerAMGSolve( void *amg_vdata, hypre_ParCSRMatrix *A, hypre_ParVector *f, hypre_ParVector *u ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = amg_vdata; /* Data Structure variables */ HYPRE_Int amg_print_level; HYPRE_Int amg_logging; HYPRE_Int cycle_count; HYPRE_Int num_levels; /* HYPRE_Int num_unknowns; */ HYPRE_Real tol; HYPRE_Int block_mode; hypre_ParCSRMatrix **A_array; hypre_ParVector **F_array; hypre_ParVector **U_array; hypre_ParCSRBlockMatrix **A_block_array; /* Local variables */ HYPRE_Int j; HYPRE_Int Solve_err_flag; HYPRE_Int min_iter; HYPRE_Int max_iter; HYPRE_Int num_procs, my_id; HYPRE_Int additive; HYPRE_Int mult_additive; HYPRE_Int simple; HYPRE_Real alpha = 1.0; HYPRE_Real beta = -1.0; HYPRE_Real cycle_op_count; HYPRE_Real total_coeffs; HYPRE_Real total_variables; HYPRE_Real *num_coeffs; HYPRE_Real *num_variables; HYPRE_Real cycle_cmplxty = 0.0; HYPRE_Real operat_cmplxty; HYPRE_Real grid_cmplxty; HYPRE_Real conv_factor = 0.0; HYPRE_Real resid_nrm = 1.0; HYPRE_Real resid_nrm_init = 0.0; HYPRE_Real relative_resid; HYPRE_Real rhs_norm = 0.0; HYPRE_Real old_resid; HYPRE_Real ieee_check = 0.; hypre_ParVector *Vtemp; hypre_ParVector *Residual; hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm,&my_id); amg_print_level = hypre_ParAMGDataPrintLevel(amg_data); amg_logging = hypre_ParAMGDataLogging(amg_data); if ( amg_logging > 1 ) Residual = hypre_ParAMGDataResidual(amg_data); /* num_unknowns = hypre_ParAMGDataNumUnknowns(amg_data); */ num_levels = hypre_ParAMGDataNumLevels(amg_data); A_array = hypre_ParAMGDataAArray(amg_data); F_array = hypre_ParAMGDataFArray(amg_data); U_array = hypre_ParAMGDataUArray(amg_data); tol = hypre_ParAMGDataTol(amg_data); min_iter = hypre_ParAMGDataMinIter(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); additive = hypre_ParAMGDataAdditive(amg_data); simple = hypre_ParAMGDataSimple(amg_data); mult_additive = hypre_ParAMGDataMultAdditive(amg_data); A_array[0] = A; F_array[0] = f; U_array[0] = u; block_mode = hypre_ParAMGDataBlockMode(amg_data); A_block_array = hypre_ParAMGDataABlockArray(amg_data); /* Vtemp = hypre_ParVectorCreate(hypre_ParCSRMatrixComm(A_array[0]), hypre_ParCSRMatrixGlobalNumRows(A_array[0]), hypre_ParCSRMatrixRowStarts(A_array[0])); hypre_ParVectorInitialize(Vtemp); hypre_ParVectorSetPartitioningOwner(Vtemp,0); hypre_ParAMGDataVtemp(amg_data) = Vtemp; */ Vtemp = hypre_ParAMGDataVtemp(amg_data); /*----------------------------------------------------------------------- * Write the solver parameters *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1) hypre_BoomerAMGWriteSolverParams(amg_data); /*----------------------------------------------------------------------- * Initialize the solver error flag and assorted bookkeeping variables *-----------------------------------------------------------------------*/ Solve_err_flag = 0; total_coeffs = 0; total_variables = 0; cycle_count = 0; operat_cmplxty = 0; grid_cmplxty = 0; /*----------------------------------------------------------------------- * write some initial info *-----------------------------------------------------------------------*/ if (my_id == 0 && amg_print_level > 1 && tol > 0.) hypre_printf("\n\nAMG SOLUTION INFO:\n"); /*----------------------------------------------------------------------- * Compute initial fine-grid residual and print *-----------------------------------------------------------------------*/ if (amg_print_level > 1 || amg_logging > 1) { if ( amg_logging > 1 ) { hypre_ParVectorCopy(F_array[0], Residual ); if (tol > 0) hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParVectorCopy(F_array[0], Vtemp); if (tol > 0) hypre_ParCSRMatrixMatvec(alpha, A_array[0], U_array[0], beta, Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } /* Since it is does not diminish performance, attempt to return an error flag and notify users when they supply bad input. */ if (resid_nrm != 0.) ieee_check = resid_nrm/resid_nrm; /* INF -> NaN conversion */ if (ieee_check != ieee_check) { /* ...INFs or NaNs in input can make ieee_check a NaN. This test for ieee_check self-equality works on all IEEE-compliant compilers/ machines, c.f. page 8 of "Lecture Notes on the Status of IEEE 754" by W. Kahan, May 31, 1996. Currently (July 2002) this paper may be found at http://HTTP.CS.Berkeley.EDU/~wkahan/ieee754status/IEEE754.PDF */ if (amg_print_level > 0) { hypre_printf("\n\nERROR detected by Hypre ... BEGIN\n"); hypre_printf("ERROR -- hypre_BoomerAMGSolve: INFs and/or NaNs detected in input.\n"); hypre_printf("User probably placed non-numerics in supplied A, x_0, or b.\n"); hypre_printf("ERROR detected by Hypre ... END\n\n\n"); } hypre_error(HYPRE_ERROR_GENERIC); return hypre_error_flag; } resid_nrm_init = resid_nrm; rhs_norm = sqrt(hypre_ParVectorInnerProd(f, f)); if (rhs_norm) { relative_resid = resid_nrm_init / rhs_norm; } else { relative_resid = resid_nrm_init; } } else { relative_resid = 1.; } if (my_id == 0 && amg_print_level > 1) { hypre_printf(" relative\n"); hypre_printf(" residual factor residual\n"); hypre_printf(" -------- ------ --------\n"); hypre_printf(" Initial %e %e\n",resid_nrm_init, relative_resid); } /*----------------------------------------------------------------------- * Main V-cycle loop *-----------------------------------------------------------------------*/ while ((relative_resid >= tol || cycle_count < min_iter) && cycle_count < max_iter) { hypre_ParAMGDataCycleOpCount(amg_data) = 0; /* Op count only needed for one cycle */ if ((additive < 0 || additive >= num_levels) && (mult_additive < 0 || mult_additive >= num_levels) && (simple < 0 || simple >= num_levels) ) hypre_BoomerAMGCycle(amg_data, F_array, U_array); else hypre_BoomerAMGAdditiveCycle(amg_data); /*--------------------------------------------------------------- * Compute fine-grid residual and residual norm *----------------------------------------------------------------*/ if (amg_print_level > 1 || amg_logging > 1 || tol > 0.) { old_resid = resid_nrm; if ( amg_logging > 1 ) { hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Residual ); resid_nrm = sqrt(hypre_ParVectorInnerProd( Residual, Residual )); } else { hypre_ParCSRMatrixMatvecOutOfPlace(alpha, A_array[0], U_array[0], beta, F_array[0], Vtemp); resid_nrm = sqrt(hypre_ParVectorInnerProd(Vtemp, Vtemp)); } if (old_resid) conv_factor = resid_nrm / old_resid; else conv_factor = resid_nrm; if (rhs_norm) { relative_resid = resid_nrm / rhs_norm; } else { relative_resid = resid_nrm; } hypre_ParAMGDataRelativeResidualNorm(amg_data) = relative_resid; } ++cycle_count; hypre_ParAMGDataNumIterations(amg_data) = cycle_count; #ifdef CUMNUMIT ++hypre_ParAMGDataCumNumIterations(amg_data); #endif if (my_id == 0 && amg_print_level > 1) { hypre_printf(" Cycle %2d %e %f %e \n", cycle_count, resid_nrm, conv_factor, relative_resid); } } if (cycle_count == max_iter && tol > 0.) { Solve_err_flag = 1; hypre_error(HYPRE_ERROR_CONV); } /*----------------------------------------------------------------------- * Compute closing statistics *-----------------------------------------------------------------------*/ if (cycle_count > 0 && resid_nrm_init) conv_factor = pow((resid_nrm/resid_nrm_init),(1.0/(HYPRE_Real) cycle_count)); else conv_factor = 1.; if (amg_print_level > 1) { num_coeffs = hypre_CTAlloc(HYPRE_Real, num_levels); num_variables = hypre_CTAlloc(HYPRE_Real, num_levels); num_coeffs[0] = hypre_ParCSRMatrixDNumNonzeros(A); num_variables[0] = hypre_ParCSRMatrixGlobalNumRows(A); if (block_mode) { for (j = 1; j < num_levels; j++) { num_coeffs[j] = (HYPRE_Real) hypre_ParCSRBlockMatrixNumNonzeros(A_block_array[j]); num_variables[j] = (HYPRE_Real) hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[j]); } num_coeffs[0] = hypre_ParCSRBlockMatrixDNumNonzeros(A_block_array[0]); num_variables[0] = hypre_ParCSRBlockMatrixGlobalNumRows(A_block_array[0]); } else { for (j = 1; j < num_levels; j++) { num_coeffs[j] = (HYPRE_Real) hypre_ParCSRMatrixNumNonzeros(A_array[j]); num_variables[j] = (HYPRE_Real) hypre_ParCSRMatrixGlobalNumRows(A_array[j]); } } for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { total_coeffs += num_coeffs[j]; total_variables += num_variables[j]; } cycle_op_count = hypre_ParAMGDataCycleOpCount(amg_data); if (num_variables[0]) grid_cmplxty = total_variables / num_variables[0]; if (num_coeffs[0]) { operat_cmplxty = total_coeffs / num_coeffs[0]; cycle_cmplxty = cycle_op_count / num_coeffs[0]; } if (my_id == 0) { if (Solve_err_flag == 1) { hypre_printf("\n\n=============================================="); hypre_printf("\n NOTE: Convergence tolerance was not achieved\n"); hypre_printf(" within the allowed %d V-cycles\n",max_iter); hypre_printf("=============================================="); } hypre_printf("\n\n Average Convergence Factor = %f",conv_factor); hypre_printf("\n\n Complexity: grid = %f\n",grid_cmplxty); hypre_printf(" operator = %f\n",operat_cmplxty); hypre_printf(" cycle = %f\n\n\n\n",cycle_cmplxty); } hypre_TFree(num_coeffs); hypre_TFree(num_variables); } return hypre_error_flag; }
HYPRE_Int hypre_BlockTridiagDestroy(void *data) { hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data; if (b_data->F1) { hypre_ParVectorDestroy(b_data->F1); b_data->F1 = NULL; } if (b_data->F2) { hypre_ParVectorDestroy(b_data->F2); b_data->F2 = NULL; } if (b_data->U1) { hypre_ParVectorDestroy(b_data->U1); b_data->U1 = NULL; } if (b_data->U2) { hypre_ParVectorDestroy(b_data->U2); b_data->U2 = NULL; } if (b_data->index_set1) { hypre_TFree(b_data->index_set1); b_data->index_set1 = NULL; } if (b_data->index_set2) { hypre_TFree(b_data->index_set2); b_data->index_set2 = NULL; } if (b_data->A11) { hypre_ParCSRMatrixDestroy(b_data->A11); b_data->A11 = NULL; } if (b_data->A21) { hypre_ParCSRMatrixDestroy(b_data->A21); b_data->A21 = NULL; } if (b_data->A22) { hypre_ParCSRMatrixDestroy(b_data->A22); b_data->A22 = NULL; } if (b_data->precon1) { HYPRE_BoomerAMGDestroy(b_data->precon1); b_data->precon1 = NULL; } if (b_data->precon2) { HYPRE_BoomerAMGDestroy(b_data->precon2); b_data->precon2 = NULL; } hypre_TFree(b_data); return (0); }
HYPRE_Int hypre_GenerateSubComm(MPI_Comm comm, HYPRE_Int participate, MPI_Comm *new_comm_ptr) { MPI_Comm new_comm; hypre_MPI_Group orig_group, new_group; hypre_MPI_Op hypre_MPI_MERGE; HYPRE_Int *info, *ranks, new_num_procs, my_info, my_id, num_procs; HYPRE_Int *list_len; hypre_MPI_Comm_rank(comm,&my_id); if (participate) my_info = 1; else my_info = 0; hypre_MPI_Allreduce(&my_info, &new_num_procs, 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); if (new_num_procs == 0) { new_comm = hypre_MPI_COMM_NULL; *new_comm_ptr = new_comm; return 0; } ranks = hypre_CTAlloc(HYPRE_Int, new_num_procs+2); if (new_num_procs == 1) { if (participate) my_info = my_id; hypre_MPI_Allreduce(&my_info, &ranks[2], 1, HYPRE_MPI_INT, hypre_MPI_SUM, comm); } else { info = hypre_CTAlloc(HYPRE_Int, new_num_procs+2); list_len = hypre_CTAlloc(HYPRE_Int, 1); if (participate) { info[0] = 1; info[1] = 1; info[2] = my_id; } else info[0] = 0; list_len[0] = new_num_procs + 2; hypre_MPI_Op_create((hypre_MPI_User_function *)hypre_merge_lists, 0, &hypre_MPI_MERGE); hypre_MPI_Allreduce(info, ranks, list_len[0], HYPRE_MPI_INT, hypre_MPI_MERGE, comm); hypre_MPI_Op_free (&hypre_MPI_MERGE); hypre_TFree(list_len); hypre_TFree(info); } hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, &ranks[2], &new_group); hypre_MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); hypre_TFree(ranks); *new_comm_ptr = new_comm; return 0; }
hypre_SStructPMatrix * hypre_SysPFMGCreateRAPOp( hypre_SStructPMatrix *R, hypre_SStructPMatrix *A, hypre_SStructPMatrix *P, hypre_SStructPGrid *coarse_grid, HYPRE_Int cdir ) { hypre_SStructPMatrix *RAP; HYPRE_Int ndim; HYPRE_Int nvars; hypre_SStructVariable vartype; hypre_SStructStencil **RAP_stencils; hypre_StructMatrix *RAP_s; hypre_StructMatrix *R_s; hypre_StructMatrix *A_s; hypre_StructMatrix *P_s; hypre_Index **RAP_shapes; hypre_StructStencil *sstencil; hypre_Index *shape; HYPRE_Int s; HYPRE_Int *sstencil_sizes; HYPRE_Int stencil_size; hypre_StructGrid *cgrid; HYPRE_Int vi,vj; HYPRE_Int sten_cntr; HYPRE_Int P_stored_as_transpose = 0; ndim = hypre_StructStencilDim(hypre_SStructPMatrixSStencil(A, 0, 0)); nvars = hypre_SStructPMatrixNVars(A); vartype = hypre_SStructPGridVarType(coarse_grid, 0); cgrid = hypre_SStructPGridVTSGrid(coarse_grid, vartype); RAP_stencils = hypre_CTAlloc(hypre_SStructStencil *, nvars); RAP_shapes = hypre_CTAlloc(hypre_Index *, nvars); sstencil_sizes = hypre_CTAlloc(HYPRE_Int, nvars); /*-------------------------------------------------------------------------- * Symmetry within a block is exploited, but not symmetry of the form * A_{vi,vj} = A_{vj,vi}^T. *--------------------------------------------------------------------------*/ for (vi = 0; vi < nvars; vi++) { R_s = hypre_SStructPMatrixSMatrix(R, vi, vi); stencil_size = 0; for (vj = 0; vj < nvars; vj++) { A_s = hypre_SStructPMatrixSMatrix(A, vi, vj); P_s = hypre_SStructPMatrixSMatrix(P, vj, vj); sstencil_sizes[vj] = 0; if (A_s != NULL) { RAP_s = hypre_SemiCreateRAPOp(R_s, A_s, P_s, cgrid, cdir, P_stored_as_transpose); /* Just want stencil for RAP */ hypre_StructMatrixInitializeShell(RAP_s); sstencil = hypre_StructMatrixStencil(RAP_s); shape = hypre_StructStencilShape(sstencil); sstencil_sizes[vj] = hypre_StructStencilSize(sstencil); stencil_size += sstencil_sizes[vj]; RAP_shapes[vj] = hypre_CTAlloc(hypre_Index, sstencil_sizes[vj]); for (s = 0; s < sstencil_sizes[vj]; s++) { hypre_CopyIndex(shape[s],RAP_shapes[vj][s]); } hypre_StructMatrixDestroy(RAP_s); } } HYPRE_SStructStencilCreate(ndim, stencil_size, &RAP_stencils[vi]); sten_cntr = 0; for (vj = 0; vj < nvars; vj++) { if (sstencil_sizes[vj] > 0) { for (s = 0; s < sstencil_sizes[vj]; s++) { HYPRE_SStructStencilSetEntry(RAP_stencils[vi], sten_cntr, RAP_shapes[vj][s], vj); sten_cntr++; } hypre_TFree(RAP_shapes[vj]); } } } /* create RAP Pmatrix */ hypre_SStructPMatrixCreate(hypre_SStructPMatrixComm(A), coarse_grid, RAP_stencils, &RAP); hypre_TFree(RAP_shapes); hypre_TFree(sstencil_sizes); return RAP; }
HYPRE_Int hypre_AMGNodalSchwarzSmoother( hypre_CSRMatrix *A, HYPRE_Int *dof_func, HYPRE_Int num_functions, HYPRE_Int option, HYPRE_Int **i_domain_dof_pointer, HYPRE_Int **j_domain_dof_pointer, HYPRE_Real **domain_matrixinverse_pointer, HYPRE_Int *num_domains_pointer) { /* option = 0: nodal symGS; 1: next to nodal symGS (overlapping Schwarz) */ HYPRE_Int *i_domain_dof, *j_domain_dof; HYPRE_Real *domain_matrixinverse; HYPRE_Int num_domains; HYPRE_Int *i_dof_node, *j_dof_node; HYPRE_Int *i_node_dof, *j_node_dof; HYPRE_Int *i_node_dof_dof, *j_node_dof_dof; HYPRE_Int *i_node_node, *j_node_node; HYPRE_Int num_nodes; HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A); HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A); HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A); HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A); HYPRE_Int ierr = 0; HYPRE_Int i,j,k, l_loc, i_loc, j_loc; HYPRE_Int i_dof, j_dof; HYPRE_Int *i_local_to_global; HYPRE_Int *i_global_to_local; HYPRE_Int *i_int; HYPRE_Int *i_int_to_local; HYPRE_Int int_dof_counter, local_dof_counter, max_local_dof_counter=0; HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0; HYPRE_Real *AE, *XE; /* PCG arrays: --------------------------------------------------- HYPRE_Real *x, *rhs, *v, *w, *d, *aux; HYPRE_Int max_iter; ------------------------------------------------------------------ */ /* build dof_node graph: ----------------------------------------- */ num_nodes = num_dofs / num_functions; hypre_printf("\nnum_nodes: %d, num_dofs: %d = %d x %d\n", num_nodes, num_dofs, num_nodes, num_functions); i_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs+1); j_dof_node = hypre_CTAlloc(HYPRE_Int, num_dofs); for (i=0; i < num_dofs+1; i++) i_dof_node[i] = i; for (j = 0; j < num_nodes; j++) for (k = 0; k < num_functions; k++) j_dof_node[j*num_functions+k] = j; /* build node_dof graph: ----------------------------------------- */ ierr = transpose_matrix_create(&i_node_dof, &j_node_dof, i_dof_node, j_dof_node, num_dofs, num_nodes); /* build node_node graph: ----------------------------------------- */ ierr = matrix_matrix_product(&i_node_dof_dof, &j_node_dof_dof, i_node_dof, j_node_dof, i_dof_dof, j_dof_dof, num_nodes, num_dofs, num_dofs); ierr = matrix_matrix_product(&i_node_node, &j_node_node, i_node_dof_dof, j_node_dof_dof, i_dof_node, j_dof_node, num_nodes, num_dofs, num_nodes); hypre_TFree(i_node_dof_dof); hypre_TFree(j_node_dof_dof); /* compute for each node the local information: -------------------- */ i_global_to_local = i_dof_node; for (i_dof =0; i_dof < num_dofs; i_dof++) i_global_to_local[i_dof] = -1; domain_matrixinverse_counter = 0; domain_dof_counter = 0; for (i=0; i < num_nodes; i++) { local_dof_counter = 0; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; if (i_global_to_local[j_dof] < 0) { i_global_to_local[j_dof] = local_dof_counter; local_dof_counter++; } } domain_matrixinverse_counter += local_dof_counter*local_dof_counter; domain_dof_counter += local_dof_counter; if (local_dof_counter > max_local_dof_counter) max_local_dof_counter = local_dof_counter; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; i_global_to_local[j_dof] = -1; } } num_domains = num_nodes; i_domain_dof = hypre_CTAlloc(HYPRE_Int, num_domains+1); if (option == 1) j_domain_dof = hypre_CTAlloc(HYPRE_Int, domain_dof_counter); else j_domain_dof = hypre_CTAlloc(HYPRE_Int, num_dofs); if (option == 1) domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter); else domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, num_dofs * num_functions); i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); AE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); XE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); i_int_to_local = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); i_int = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); for (l_loc=0; l_loc < max_local_dof_counter; l_loc++) i_int[l_loc] = -1; domain_dof_counter = 0; domain_matrixinverse_counter = 0; for (i=0; i < num_nodes; i++) { i_domain_dof[i] = domain_dof_counter; local_dof_counter = 0; for (j=i_node_node[i]; j < i_node_node[i+1]; j++) for (k=i_node_dof[j_node_node[j]]; k<i_node_dof[j_node_node[j]+1]; k++) { j_dof = j_node_dof[k]; if (i_global_to_local[j_dof] < 0) { i_global_to_local[j_dof] = local_dof_counter; i_local_to_global[local_dof_counter] = j_dof; local_dof_counter++; } } for (j=i_node_dof[i]; j < i_node_dof[i+1]; j++) for (k=i_dof_dof[j_node_dof[j]]; k < i_dof_dof[j_node_dof[j]+1]; k++) if (i_global_to_local[j_dof_dof[k]] < 0) hypre_printf("WRONG local indexing: ====================== \n"); int_dof_counter = 0; for (k=i_node_dof[i]; k < i_node_dof[i+1]; k++) { i_dof = j_node_dof[k]; i_loc = i_global_to_local[i_dof]; i_int[i_loc] = int_dof_counter; i_int_to_local[int_dof_counter] = i_loc; int_dof_counter++; } /* get local matrix AE: ======================================== */ if (option == 1) { for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) AE[i_loc + j_loc * local_dof_counter] = 0.e0; for (i_loc=0; i_loc < local_dof_counter; i_loc++) { i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j]; } } /* get block for Schwarz smoother: ============================= */ ierr = matinv(XE, AE, local_dof_counter); /* hypre_printf("ierr_AE_inv: %d\n", ierr); */ } if (option == 1) for (i_loc=0; i_loc < local_dof_counter; i_loc++) j_domain_dof[domain_dof_counter+i_loc] = i_local_to_global[i_loc]; if (option == 1) for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * local_dof_counter] = XE[i_loc + j_loc * local_dof_counter]; if (option == 0) { for (i_loc=0; i_loc < int_dof_counter; i_loc++) for (j_loc=0; j_loc < int_dof_counter; j_loc++) AE[i_loc + j_loc * int_dof_counter] = 0.e0; for (l_loc=0; l_loc < int_dof_counter; l_loc++) { i_loc = i_int_to_local[l_loc]; i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) if (i_int[j_loc] >=0) AE[i_loc + i_int[j_loc] * int_dof_counter] = a_dof_dof[j]; } } ierr = matinv(XE, AE, int_dof_counter); for (i_loc=0; i_loc < int_dof_counter; i_loc++) { j_domain_dof[domain_dof_counter + i_loc] = i_local_to_global[i_int_to_local[i_loc]]; for (j_loc=0; j_loc < int_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * int_dof_counter] = XE[i_loc + j_loc * int_dof_counter]; } domain_dof_counter+=int_dof_counter; domain_matrixinverse_counter+=int_dof_counter*int_dof_counter; } else { domain_dof_counter+=local_dof_counter; domain_matrixinverse_counter+=local_dof_counter*local_dof_counter; } for (l_loc=0; l_loc < local_dof_counter; l_loc++) { i_int[l_loc] = -1; i_global_to_local[i_local_to_global[l_loc]] = -1; } } i_domain_dof[num_nodes] = domain_dof_counter; hypre_TFree(i_dof_node); hypre_TFree(j_dof_node); hypre_TFree(i_node_dof); hypre_TFree(j_node_dof); hypre_TFree(i_node_node); hypre_TFree(j_node_node); hypre_TFree(i_int); hypre_TFree(i_int_to_local); hypre_TFree(i_local_to_global); hypre_TFree(AE); hypre_TFree(XE); *i_domain_dof_pointer = i_domain_dof; *j_domain_dof_pointer = j_domain_dof; *num_domains_pointer = num_domains; *domain_matrixinverse_pointer = domain_matrixinverse; /* hypre_printf("exit *Schwarz*: ===============================\n\n"); */ /* ----------------------------------------------------------------- x = hypre_CTAlloc(HYPRE_Real, num_dofs); rhs = hypre_CTAlloc(HYPRE_Real, num_dofs); v = hypre_CTAlloc(HYPRE_Real, num_dofs); w = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); aux = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) x[i] = 0.e0; for (i=0; i < num_dofs; i++) rhs[i] = rand(); max_iter = 1000; hypre_printf("\nenter SchwarzPCG: =======================================\n"); ierr = hypre_Schwarzpcg(x, rhs, a_dof_dof, i_dof_dof, j_dof_dof, i_domain_dof, j_domain_dof, domain_matrixinverse, num_domains, v, w, d, aux, max_iter, num_dofs); hypre_printf("\n\n=======================================================\n"); hypre_printf(" END test PCG solve: \n"); hypre_printf("===========================================================\n"); hypre_TFree(x); hypre_TFree(rhs); hypre_TFree(aux); hypre_TFree(v); hypre_TFree(w); hypre_TFree(d); ----------------------------------------------------------------------- */ return ierr; }
hypre_int main( hypre_int argc, char *argv[] ) { HYPRE_Int arg_index; HYPRE_Int print_usage; HYPRE_Int nx, ny, nz; HYPRE_Int P, Q, R; HYPRE_Int bx, by, bz; HYPRE_StructGrid from_grid, to_grid; HYPRE_StructVector from_vector, to_vector, check_vector; HYPRE_CommPkg comm_pkg; HYPRE_Int time_index; HYPRE_Int num_procs, myid; HYPRE_Int p, q, r; HYPRE_Int dim; HYPRE_Int nblocks ; HYPRE_Int **ilower, **iupper, **iupper2; HYPRE_Int istart[3]; HYPRE_Int i, ix, iy, iz, ib; HYPRE_Int print_system = 0; HYPRE_Real check; /*----------------------------------------------------------- * Initialize some stuff *-----------------------------------------------------------*/ /* Initialize MPI */ hypre_MPI_Init(&argc, &argv); hypre_MPI_Comm_size(hypre_MPI_COMM_WORLD, &num_procs ); hypre_MPI_Comm_rank(hypre_MPI_COMM_WORLD, &myid ); /*----------------------------------------------------------- * Set defaults *-----------------------------------------------------------*/ dim = 3; nx = 2; ny = 2; nz = 2; P = num_procs; Q = 1; R = 1; bx = 1; by = 1; bz = 1; istart[0] = 1; istart[1] = 1; istart[2] = 1; /*----------------------------------------------------------- * Parse command line *-----------------------------------------------------------*/ print_usage = 0; arg_index = 1; while (arg_index < argc) { if ( strcmp(argv[arg_index], "-n") == 0 ) { arg_index++; nx = atoi(argv[arg_index++]); ny = atoi(argv[arg_index++]); nz = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-istart") == 0 ) { arg_index++; istart[0] = atoi(argv[arg_index++]); istart[1] = atoi(argv[arg_index++]); istart[2] = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-P") == 0 ) { arg_index++; P = atoi(argv[arg_index++]); Q = atoi(argv[arg_index++]); R = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-b") == 0 ) { arg_index++; bx = atoi(argv[arg_index++]); by = atoi(argv[arg_index++]); bz = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-d") == 0 ) { arg_index++; dim = atoi(argv[arg_index++]); } else if ( strcmp(argv[arg_index], "-print") == 0 ) { arg_index++; print_system = 1; } else if ( strcmp(argv[arg_index], "-help") == 0 ) { print_usage = 1; break; } else { arg_index++; } } /*----------------------------------------------------------- * Print usage info *-----------------------------------------------------------*/ if ( (print_usage) && (myid == 0) ) { hypre_printf("\n"); hypre_printf("Usage: %s [<options>]\n", argv[0]); hypre_printf("\n"); hypre_printf(" -n <nx> <ny> <nz> : problem size per block\n"); hypre_printf(" -istart <ix> <iy> <iz> : start of box\n"); hypre_printf(" -P <Px> <Py> <Pz> : processor topology\n"); hypre_printf(" -b <bx> <by> <bz> : blocking per processor\n"); hypre_printf(" -d <dim> : problem dimension (2 or 3)\n"); hypre_printf(" -print : print vectors\n"); hypre_printf("\n"); } if ( print_usage ) { exit(1); } /*----------------------------------------------------------- * Check a few things *-----------------------------------------------------------*/ if ((P*Q*R) > num_procs) { if (myid == 0) { hypre_printf("Error: PxQxR is more than the number of processors\n"); } exit(1); } else if ((P*Q*R) < num_procs) { if (myid == 0) { hypre_printf("Warning: PxQxR is less than the number of processors\n"); } } /*----------------------------------------------------------- * Print driver parameters *-----------------------------------------------------------*/ if (myid == 0) { hypre_printf("Running with these driver parameters:\n"); hypre_printf(" (nx, ny, nz) = (%d, %d, %d)\n", nx, ny, nz); hypre_printf(" (ix, iy, iz) = (%d, %d, %d)\n", istart[0],istart[1],istart[2]); hypre_printf(" (Px, Py, Pz) = (%d, %d, %d)\n", P, Q, R); hypre_printf(" (bx, by, bz) = (%d, %d, %d)\n", bx, by, bz); hypre_printf(" dim = %d\n", dim); } /*----------------------------------------------------------- * Set up the stencil structure (7 points) when matrix is NOT read from file * Set up the grid structure used when NO files are read *-----------------------------------------------------------*/ switch (dim) { case 1: nblocks = bx; p = myid % P; break; case 2: nblocks = bx*by; p = myid % P; q = (( myid - p)/P) % Q; break; case 3: nblocks = bx*by*bz; p = myid % P; q = (( myid - p)/P) % Q; r = ( myid - p - P*q)/( P*Q ); break; } if (myid >= (P*Q*R)) { /* My processor has no data on it */ nblocks = bx = by = bz = 0; } /*----------------------------------------------------------- * prepare space for the extents *-----------------------------------------------------------*/ ilower = hypre_CTAlloc(HYPRE_Int*, nblocks); iupper = hypre_CTAlloc(HYPRE_Int*, nblocks); iupper2 = hypre_CTAlloc(HYPRE_Int*, nblocks); for (i = 0; i < nblocks; i++) { ilower[i] = hypre_CTAlloc(HYPRE_Int, dim); iupper[i] = hypre_CTAlloc(HYPRE_Int, dim); iupper2[i] = hypre_CTAlloc(HYPRE_Int, dim); } ib = 0; switch (dim) { case 1: for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; iupper2[ib][0] = iupper[ib][0]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; ib++; } break; case 2: for (iy = 0; iy < by; iy++) for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; ilower[ib][1] = istart[1]+ ny*(by*q+iy); iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; iupper2[ib][0] = iupper[ib][0]; iupper2[ib][1] = iupper[ib][1]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; if ( (iy == (by-1)) && (q < (Q-1)) ) iupper2[ib][1] = iupper[ib][1] + 1; ib++; } break; case 3: for (iz = 0; iz < bz; iz++) for (iy = 0; iy < by; iy++) for (ix = 0; ix < bx; ix++) { ilower[ib][0] = istart[0]+ nx*(bx*p+ix); iupper[ib][0] = istart[0]+ nx*(bx*p+ix+1) - 1; ilower[ib][1] = istart[1]+ ny*(by*q+iy); iupper[ib][1] = istart[1]+ ny*(by*q+iy+1) - 1; ilower[ib][2] = istart[2]+ nz*(bz*r+iz); iupper[ib][2] = istart[2]+ nz*(bz*r+iz+1) - 1; iupper2[ib][0] = iupper[ib][0]; iupper2[ib][1] = iupper[ib][1]; iupper2[ib][2] = iupper[ib][2]; if ( (ix == (bx-1)) && (p < (P-1)) ) iupper2[ib][0] = iupper[ib][0] + 1; if ( (iy == (by-1)) && (q < (Q-1)) ) iupper2[ib][1] = iupper[ib][1] + 1; if ( (iz == (bz-1)) && (r < (R-1)) ) iupper2[ib][2] = iupper[ib][2] + 1; ib++; } break; } HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &from_grid); HYPRE_StructGridCreate(hypre_MPI_COMM_WORLD, dim, &to_grid); for (ib = 0; ib < nblocks; ib++) { HYPRE_StructGridSetExtents(from_grid, ilower[ib], iupper[ib]); HYPRE_StructGridSetExtents(to_grid, ilower[ib], iupper2[ib]); } HYPRE_StructGridAssemble(from_grid); HYPRE_StructGridAssemble(to_grid); /*----------------------------------------------------------- * Set up the vectors *-----------------------------------------------------------*/ HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, from_grid, &from_vector); HYPRE_StructVectorInitialize(from_vector); AddValuesVector(from_grid, from_vector, 1.0); HYPRE_StructVectorAssemble(from_vector); HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &to_vector); HYPRE_StructVectorInitialize(to_vector); AddValuesVector(to_grid, to_vector, 0.0); HYPRE_StructVectorAssemble(to_vector); /* Vector used to check the migration */ HYPRE_StructVectorCreate(hypre_MPI_COMM_WORLD, to_grid, &check_vector); HYPRE_StructVectorInitialize(check_vector); AddValuesVector(to_grid, check_vector, 1.0); HYPRE_StructVectorAssemble(check_vector); /*----------------------------------------------------------- * Migrate *-----------------------------------------------------------*/ time_index = hypre_InitializeTiming("Struct Migrate"); hypre_BeginTiming(time_index); HYPRE_StructVectorGetMigrateCommPkg(from_vector, to_vector, &comm_pkg); HYPRE_StructVectorMigrate(comm_pkg, from_vector, to_vector); HYPRE_CommPkgDestroy(comm_pkg); hypre_EndTiming(time_index); hypre_PrintTiming("Struct Migrate", hypre_MPI_COMM_WORLD); hypre_FinalizeTiming(time_index); /*----------------------------------------------------------- * Check the migration and print the result *-----------------------------------------------------------*/ hypre_StructAxpy(-1.0, to_vector, check_vector); check = hypre_StructInnerProd (check_vector, check_vector); if (myid == 0) { printf("\nCheck = %1.0f (success = 0)\n\n", check); } /*----------------------------------------------------------- * Print out the vectors *-----------------------------------------------------------*/ if (print_system) { HYPRE_StructVectorPrint("struct_migrate.out.xfr", from_vector, 0); HYPRE_StructVectorPrint("struct_migrate.out.xto", to_vector, 0); } /*----------------------------------------------------------- * Finalize things *-----------------------------------------------------------*/ HYPRE_StructGridDestroy(from_grid); HYPRE_StructGridDestroy(to_grid); for (i = 0; i < nblocks; i++) { hypre_TFree(ilower[i]); hypre_TFree(iupper[i]); hypre_TFree(iupper2[i]); } hypre_TFree(ilower); hypre_TFree(iupper); hypre_TFree(iupper2); HYPRE_StructVectorDestroy(from_vector); HYPRE_StructVectorDestroy(to_vector); HYPRE_StructVectorDestroy(check_vector); /* Finalize MPI */ hypre_MPI_Finalize(); return (0); }
int hypre_ParCSRMatrixMatvecT( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; int vecstride = hypre_VectorVectorStride( y_local ); int idxstride = hypre_VectorIndexStride( y_local ); double *y_tmp_data, **y_buf_data; double *y_local_data = hypre_VectorData(y_local); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); int num_cols_offd = hypre_CSRMatrixNumCols(offd); HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); int num_vectors = hypre_VectorNumVectors(y_local); int i, j, jv, index, start, num_sends; int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows != x_size) ierr = 1; if (num_cols != y_size) ierr = 2; if (num_rows != x_size && num_cols != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*if ( num_vectors==1 ) {*/ y_tmp = hypre_SeqVectorCreate(num_cols_offd); /*} else { y_tmp = hypre_SeqMultiVectorCreate(num_cols_offd,num_vectors); }*/ hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); y_buf_data = hypre_CTAlloc( double*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) y_buf_data[jv] = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); hypre_assert( idxstride==1 ); /* >>> only 'column' storage of multivectors implemented so far */ if (num_cols_offd) hypre_CSRMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); for ( jv=0; jv<num_vectors; ++jv ) { /* >>> this is where we assume multivectors are 'column' storage */ comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 2, comm_pkg, &(y_tmp_data[jv*num_cols_offd]), y_buf_data[jv] ); } hypre_CSRMatrixMatvecT(alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if ( num_vectors==1 ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)] += y_buf_data[0][index++]; } } else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) y_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ] += y_buf_data[jv][index++]; } } hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(y_buf_data[jv]); hypre_TFree(y_buf_data); return ierr; }
int hypre_ParCSRMatrixMatvec( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle **comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); int num_vectors = 1; int num_cols_offd = hypre_CSRMatrixNumCols(offd); int ierr = 0; int num_sends, i, j, jv, index, start; /*int vecstride = hypre_VectorVectorStride( x_local ); int idxstride = hypre_VectorIndexStride( x_local );*/ double *x_tmp_data, **x_buf_data; double *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ /*hypre_assert( idxstride>0 );*/ if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); comm_handle = hypre_CTAlloc(hypre_ParCSRCommHandle*,num_vectors); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); x_buf_data = hypre_CTAlloc( double*, num_vectors ); for ( jv=0; jv<num_vectors; ++jv ) x_buf_data[jv] = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); /*if ( num_vectors==1 )*/ { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[0][index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } } /*else for ( jv=0; jv<num_vectors; ++jv ) { index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[jv][index++] = x_local_data[ jv*vecstride + idxstride*hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j) ]; } } hypre_assert( idxstride==1 );*/ /* >>> ... The assert is because the following loop only works for 'column' storage of a multivector <<< >>> This needs to be fixed to work more generally, at least for 'row' storage. <<< >>> This in turn, means either change CommPkg so num_sends is no.zones*no.vectors (not no.zones) >>> or, less dangerously, put a stride in the logic of CommHandleCreate (stride either from a >>> new arg or a new variable inside CommPkg). Or put the num_vector iteration inside >>> CommHandleCreate (perhaps a new multivector variant of it). */ for ( jv=0; jv<num_vectors; ++jv ) { comm_handle[jv] = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data[jv], &(x_tmp_data[jv*num_cols_offd]) ); } hypre_CSRMatrixMatvec( alpha, diag, x_local, beta, y_local); for ( jv=0; jv<num_vectors; ++jv ) { hypre_ParCSRCommHandleDestroy(comm_handle[jv]); comm_handle[jv] = NULL; } hypre_TFree(comm_handle); if (num_cols_offd) hypre_CSRMatrixMatvec( alpha, offd, x_tmp, 1.0, y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; for ( jv=0; jv<num_vectors; ++jv ) hypre_TFree(x_buf_data[jv]); hypre_TFree(x_buf_data); return ierr; }
hypre_ParVector * hypre_VectorToParVector (MPI_Comm comm, hypre_Vector *v, HYPRE_Int *vec_starts) { HYPRE_Int global_size; HYPRE_Int local_size; HYPRE_Int num_vectors; HYPRE_Int num_procs, my_id; HYPRE_Int global_vecstride, vecstride, idxstride; hypre_ParVector *par_vector; hypre_Vector *local_vector; double *v_data; double *local_data; hypre_MPI_Request *requests; hypre_MPI_Status *status, status0; HYPRE_Int i, j, k, p; hypre_MPI_Comm_size(comm,&num_procs); hypre_MPI_Comm_rank(comm,&my_id); if (my_id == 0) { global_size = hypre_VectorSize(v); v_data = hypre_VectorData(v); num_vectors = hypre_VectorNumVectors(v); /* for multivectors */ global_vecstride = hypre_VectorVectorStride(v); } hypre_MPI_Bcast(&global_size,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&num_vectors,1,HYPRE_MPI_INT,0,comm); hypre_MPI_Bcast(&global_vecstride,1,HYPRE_MPI_INT,0,comm); if ( num_vectors==1 ) par_vector = hypre_ParVectorCreate(comm, global_size, vec_starts); else par_vector = hypre_ParMultiVectorCreate(comm, global_size, vec_starts, num_vectors); vec_starts = hypre_ParVectorPartitioning(par_vector); local_size = vec_starts[my_id+1] - vec_starts[my_id]; hypre_ParVectorInitialize(par_vector); local_vector = hypre_ParVectorLocalVector(par_vector); local_data = hypre_VectorData(local_vector); vecstride = hypre_VectorVectorStride(local_vector); idxstride = hypre_VectorIndexStride(local_vector); hypre_assert( idxstride==1 ); /* <<< so far only the only implemented multivector StorageMethod is 0 <<< */ if (my_id == 0) { requests = hypre_CTAlloc(hypre_MPI_Request,num_vectors*(num_procs-1)); status = hypre_CTAlloc(hypre_MPI_Status,num_vectors*(num_procs-1)); k = 0; for ( p=1; p<num_procs; p++) for ( j=0; j<num_vectors; ++j ) { hypre_MPI_Isend( &v_data[vec_starts[p]]+j*global_vecstride, (vec_starts[p+1]-vec_starts[p]), hypre_MPI_DOUBLE, p, 0, comm, &requests[k++] ); } if ( num_vectors==1 ) { for (i=0; i < local_size; i++) local_data[i] = v_data[i]; } else for ( j=0; j<num_vectors; ++j ) { for (i=0; i < local_size; i++) local_data[i+j*vecstride] = v_data[i+j*global_vecstride]; } hypre_MPI_Waitall(num_procs-1,requests, status); hypre_TFree(requests); hypre_TFree(status); } else { for ( j=0; j<num_vectors; ++j ) hypre_MPI_Recv( local_data+j*vecstride, local_size, hypre_MPI_DOUBLE, 0, 0, comm,&status0 ); } return par_vector; }
hypre_Vector * hypre_ParVectorToVectorAll (hypre_ParVector *par_v) { MPI_Comm comm = hypre_ParVectorComm(par_v); HYPRE_Int global_size = hypre_ParVectorGlobalSize(par_v); #ifndef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *vec_starts = hypre_ParVectorPartitioning(par_v); #endif hypre_Vector *local_vector = hypre_ParVectorLocalVector(par_v); HYPRE_Int num_procs, my_id; HYPRE_Int num_vectors = hypre_ParVectorNumVectors(par_v); hypre_Vector *vector; double *vector_data; double *local_data; HYPRE_Int local_size; hypre_MPI_Request *requests; hypre_MPI_Status *status; HYPRE_Int i, j; HYPRE_Int *used_procs; HYPRE_Int num_types, num_requests; HYPRE_Int vec_len, proc_id; #ifdef HYPRE_NO_GLOBAL_PARTITION HYPRE_Int *new_vec_starts; HYPRE_Int num_contacts; HYPRE_Int contact_proc_list[1]; HYPRE_Int contact_send_buf[1]; HYPRE_Int contact_send_buf_starts[2]; HYPRE_Int max_response_size; HYPRE_Int *response_recv_buf=NULL; HYPRE_Int *response_recv_buf_starts = NULL; hypre_DataExchangeResponse response_obj; hypre_ProcListElements send_proc_obj; HYPRE_Int *send_info = NULL; hypre_MPI_Status status1; HYPRE_Int count, tag1 = 112, tag2 = 223; HYPRE_Int start; #endif hypre_MPI_Comm_size(comm, &num_procs); hypre_MPI_Comm_rank(comm, &my_id); #ifdef HYPRE_NO_GLOBAL_PARTITION local_size = hypre_ParVectorLastIndex(par_v) - hypre_ParVectorFirstIndex(par_v) + 1; /* determine procs which hold data of par_v and store ids in used_procs */ /* we need to do an exchange data for this. If I own row then I will contact processor 0 with the endpoint of my local range */ if (local_size > 0) { num_contacts = 1; contact_proc_list[0] = 0; contact_send_buf[0] = hypre_ParVectorLastIndex(par_v); contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 1; } else { num_contacts = 0; contact_send_buf_starts[0] = 0; contact_send_buf_starts[1] = 0; } /*build the response object*/ /*send_proc_obj will be for saving info from contacts */ send_proc_obj.length = 0; send_proc_obj.storage_length = 10; send_proc_obj.id = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length); send_proc_obj.vec_starts = hypre_CTAlloc(HYPRE_Int, send_proc_obj.storage_length + 1); send_proc_obj.vec_starts[0] = 0; send_proc_obj.element_storage_length = 10; send_proc_obj.elements = hypre_CTAlloc(HYPRE_Int, send_proc_obj.element_storage_length); max_response_size = 0; /* each response is null */ response_obj.fill_response = hypre_FillResponseParToVectorAll; response_obj.data1 = NULL; response_obj.data2 = &send_proc_obj; /*this is where we keep info from contacts*/ hypre_DataExchangeList(num_contacts, contact_proc_list, contact_send_buf, contact_send_buf_starts, sizeof(HYPRE_Int), sizeof(HYPRE_Int), &response_obj, max_response_size, 1, comm, (void**) &response_recv_buf, &response_recv_buf_starts); /* now processor 0 should have a list of ranges for processors that have rows - these are in send_proc_obj - it needs to create the new list of processors and also an array of vec starts - and send to those who own row*/ if (my_id) { if (local_size) { /* look for a message from processor 0 */ hypre_MPI_Probe(0, tag1, comm, &status1); hypre_MPI_Get_count(&status1, HYPRE_MPI_INT, &count); send_info = hypre_CTAlloc(HYPRE_Int, count); hypre_MPI_Recv(send_info, count, HYPRE_MPI_INT, 0, tag1, comm, &status1); /* now unpack */ num_types = send_info[0]; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); for (i=1; i<= num_types; i++) { used_procs[i-1] = send_info[i]; } for (i=num_types+1; i< count; i++) { new_vec_starts[i-num_types-1] = send_info[i] ; } } else /* clean up and exit */ { hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); return NULL; } } else /* my_id ==0 */ { num_types = send_proc_obj.length; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); new_vec_starts = hypre_CTAlloc(HYPRE_Int, num_types+1); new_vec_starts[0] = 0; for (i=0; i< num_types; i++) { used_procs[i] = send_proc_obj.id[i]; new_vec_starts[i+1] = send_proc_obj.elements[i]+1; } qsort0(used_procs, 0, num_types-1); qsort0(new_vec_starts, 0, num_types); /*now we need to put into an array to send */ count = 2*num_types+2; send_info = hypre_CTAlloc(HYPRE_Int, count); send_info[0] = num_types; for (i=1; i<= num_types; i++) { send_info[i] = used_procs[i-1]; } for (i=num_types+1; i< count; i++) { send_info[i] = new_vec_starts[i-num_types-1]; } requests = hypre_CTAlloc(hypre_MPI_Request, num_types); status = hypre_CTAlloc(hypre_MPI_Status, num_types); /* don't send to myself - these are sorted so my id would be first*/ start = 0; if (used_procs[0] == 0) { start = 1; } for (i=start; i < num_types; i++) { hypre_MPI_Isend(send_info, count, HYPRE_MPI_INT, used_procs[i], tag1, comm, &requests[i-start]); } hypre_MPI_Waitall(num_types-start, requests, status); hypre_TFree(status); hypre_TFree(requests); } /* clean up */ hypre_TFree(send_proc_obj.vec_starts); hypre_TFree(send_proc_obj.id); hypre_TFree(send_proc_obj.elements); hypre_TFree(send_info); if(response_recv_buf) hypre_TFree(response_recv_buf); if(response_recv_buf_starts) hypre_TFree(response_recv_buf_starts); /* now proc 0 can exit if it has no rows */ if (!local_size) { hypre_TFree(used_procs); hypre_TFree(new_vec_starts); return NULL; } /* everyone left has rows and knows: new_vec_starts, num_types, and used_procs */ /* this vector should be rather small */ local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); num_requests = 2*num_types; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector - here we send to ourself also*/ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = new_vec_starts[i+1] - new_vec_starts[i]; hypre_MPI_Irecv(&vector_data[new_vec_starts[i]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, tag2, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], tag2, comm, &requests[j++]); } hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(requests); hypre_TFree(status); hypre_TFree(used_procs); } hypre_TFree(new_vec_starts); #else local_size = vec_starts[my_id+1] - vec_starts[my_id]; /* if my_id contains no data, return NULL */ if (!local_size) return NULL; local_data = hypre_VectorData(local_vector); vector = hypre_SeqVectorCreate(global_size); hypre_VectorNumVectors(vector) = num_vectors; hypre_SeqVectorInitialize(vector); vector_data = hypre_VectorData(vector); /* determine procs which hold data of par_v and store ids in used_procs */ num_types = -1; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i]) num_types++; num_requests = 2*num_types; used_procs = hypre_CTAlloc(HYPRE_Int, num_types); j = 0; for (i=0; i < num_procs; i++) if (vec_starts[i+1]-vec_starts[i] && i-my_id) used_procs[j++] = i; requests = hypre_CTAlloc(hypre_MPI_Request, num_requests); status = hypre_CTAlloc(hypre_MPI_Status, num_requests); /* initialize data exchange among used_procs and generate vector */ j = 0; for (i = 0; i < num_types; i++) { proc_id = used_procs[i]; vec_len = vec_starts[proc_id+1] - vec_starts[proc_id]; hypre_MPI_Irecv(&vector_data[vec_starts[proc_id]], num_vectors*vec_len, hypre_MPI_DOUBLE, proc_id, 0, comm, &requests[j++]); } for (i = 0; i < num_types; i++) { hypre_MPI_Isend(local_data, num_vectors*local_size, hypre_MPI_DOUBLE, used_procs[i], 0, comm, &requests[j++]); } for (i=0; i < num_vectors*local_size; i++) vector_data[vec_starts[my_id]+i] = local_data[i]; hypre_MPI_Waitall(num_requests, requests, status); if (num_requests) { hypre_TFree(used_procs); hypre_TFree(requests); hypre_TFree(status); } #endif return vector; }
HYPRE_Int HYPRE_SStructGridDestroy( HYPRE_SStructGrid grid ) { HYPRE_Int nparts; hypre_SStructPGrid **pgrids; HYPRE_Int *nneighbors; hypre_SStructNeighbor **neighbors; hypre_Index **nbor_offsets; HYPRE_Int **nvneighbors; hypre_SStructNeighbor ***vneighbors; hypre_SStructCommInfo **vnbor_comm_info; HYPRE_Int vnbor_ncomms; HYPRE_Int *fem_nvars; HYPRE_Int **fem_vars; hypre_Index **fem_offsets; hypre_BoxManager ***managers; hypre_BoxManager ***nbor_managers; HYPRE_Int nvars; HYPRE_Int part, var, i; if (grid) { hypre_SStructGridRefCount(grid) --; if (hypre_SStructGridRefCount(grid) == 0) { nparts = hypre_SStructGridNParts(grid); pgrids = hypre_SStructGridPGrids(grid); nneighbors = hypre_SStructGridNNeighbors(grid); neighbors = hypre_SStructGridNeighbors(grid); nbor_offsets = hypre_SStructGridNborOffsets(grid); nvneighbors = hypre_SStructGridNVNeighbors(grid); vneighbors = hypre_SStructGridVNeighbors(grid); vnbor_comm_info = hypre_SStructGridVNborCommInfo(grid); vnbor_ncomms = hypre_SStructGridVNborNComms(grid); fem_nvars = hypre_SStructGridFEMNVars(grid); fem_vars = hypre_SStructGridFEMVars(grid); fem_offsets = hypre_SStructGridFEMOffsets(grid); managers = hypre_SStructGridBoxManagers(grid); nbor_managers = hypre_SStructGridNborBoxManagers(grid); for (part = 0; part < nparts; part++) { nvars = hypre_SStructPGridNVars(pgrids[part]); for (var = 0; var < nvars; var++) { hypre_TFree(vneighbors[part][var]); hypre_BoxManDestroy(managers[part][var]); hypre_BoxManDestroy(nbor_managers[part][var]); } hypre_TFree(neighbors[part]); hypre_TFree(nbor_offsets[part]); hypre_TFree(nvneighbors[part]); hypre_TFree(vneighbors[part]); hypre_SStructPGridDestroy(pgrids[part]); hypre_TFree(fem_vars[part]); hypre_TFree(fem_offsets[part]); hypre_TFree(managers[part]); hypre_TFree(nbor_managers[part]); } for (i = 0; i < vnbor_ncomms; i++) { hypre_CommInfoDestroy( hypre_SStructCommInfoCommInfo(vnbor_comm_info[i])); hypre_TFree(vnbor_comm_info[i]); } hypre_TFree(vnbor_comm_info); hypre_TFree(pgrids); hypre_TFree(nneighbors); hypre_TFree(neighbors); hypre_TFree(nbor_offsets); hypre_TFree(fem_nvars); hypre_TFree(fem_vars); hypre_TFree(fem_offsets); hypre_TFree(nvneighbors); hypre_TFree(vneighbors); hypre_TFree(vnbor_comm_info); hypre_TFree(managers); hypre_TFree(nbor_managers); hypre_TFree(grid); } } return hypre_error_flag; }
HYPRE_Int HYPRE_ParCSRMLSetup( HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { HYPRE_Int i, my_id, nprocs, coarsest_level, level, sweeps, nlevels; HYPRE_Int *row_partition, localEqns, length; HYPRE_Int Nblocks, *blockList; double wght; MH_Context *context; MH_Matrix *mh_mat; /* -------------------------------------------------------- */ /* fetch the ML pointer */ /* -------------------------------------------------------- */ MH_Link *link = (MH_Link *) solver; ML *ml = link->ml_ptr; nlevels = link->nlevels; /* -------------------------------------------------------- */ /* set up the parallel environment */ /* -------------------------------------------------------- */ hypre_MPI_Comm_rank(link->comm, &my_id); hypre_MPI_Comm_size(link->comm, &nprocs); /* -------------------------------------------------------- */ /* fetch the matrix row partition information and put it */ /* into the matrix data object (for matvec and getrow) */ /* -------------------------------------------------------- */ HYPRE_ParCSRMatrixGetRowPartitioning( A, &row_partition ); localEqns = row_partition[my_id+1] - row_partition[my_id]; context = (MH_Context *) malloc(sizeof(MH_Context)); link->contxt = context; context->comm = link->comm; context->globalEqns = row_partition[nprocs]; context->partition = (HYPRE_Int *) malloc(sizeof(HYPRE_Int)*(nprocs+1)); for (i=0; i<=nprocs; i++) context->partition[i] = row_partition[i]; hypre_TFree( row_partition ); mh_mat = ( MH_Matrix * ) malloc( sizeof( MH_Matrix) ); context->Amat = mh_mat; HYPRE_ParCSRMLConstructMHMatrix(A,mh_mat,link->comm, context->partition,context); /* -------------------------------------------------------- */ /* set up the ML communicator information */ /* -------------------------------------------------------- */ ML_Set_Comm_Communicator(ml, link->comm); ML_Set_Comm_MyRank(ml, my_id); ML_Set_Comm_Nprocs(ml, nprocs); ML_Set_Comm_Send(ml, MH_Send); ML_Set_Comm_Recv(ml, MH_Irecv); ML_Set_Comm_Wait(ml, MH_Wait); /* -------------------------------------------------------- */ /* set up the ML matrix information */ /* -------------------------------------------------------- */ ML_Init_Amatrix(ml, nlevels-1, localEqns, localEqns, (void *) context); ML_Set_Amatrix_Matvec(ml, nlevels-1, MH_MatVec); length = localEqns; for (i=0; i<mh_mat->recvProcCnt; i++ ) length += mh_mat->recvLeng[i]; ML_Set_Amatrix_Getrow(ml, nlevels-1, MH_GetRow, MH_ExchBdry, length); /* -------------------------------------------------------- */ /* create an aggregate context */ /* -------------------------------------------------------- */ ML_Aggregate_Create(&(link->ml_ag)); link->ml_ag->max_levels = link->nlevels; ML_Aggregate_Set_Threshold( link->ml_ag, link->ag_threshold ); /* -------------------------------------------------------- */ /* perform aggregation */ /* -------------------------------------------------------- */ coarsest_level = ML_Gen_MGHierarchy_UsingAggregation(ml, nlevels-1, ML_DECREASING, link->ml_ag); if ( my_id == 0 ) hypre_printf("ML : number of levels = %d\n", coarsest_level); coarsest_level = nlevels - coarsest_level; /* -------------------------------------------------------- */ /* set up smoother and coarse solver */ /* -------------------------------------------------------- */ for (level = nlevels-1; level > coarsest_level; level--) { sweeps = link->pre_sweeps; wght = link->jacobi_wt; switch ( link->pre ) { case 0 : ML_Gen_SmootherJacobi(ml, level, ML_PRESMOOTHER, sweeps, wght); break; case 1 : ML_Gen_SmootherGaussSeidel(ml, level, ML_PRESMOOTHER, sweeps); break; case 2 : ML_Gen_SmootherSymGaussSeidel(ml,level,ML_PRESMOOTHER,sweeps,1.0); break; case 3 : Nblocks = ML_Aggregate_Get_AggrCount( link->ml_ag, level ); ML_Aggregate_Get_AggrMap( link->ml_ag, level, &blockList ); ML_Gen_SmootherVBlockGaussSeidel(ml,level,ML_PRESMOOTHER, sweeps, Nblocks, blockList); break; case 4 : Nblocks = ML_Aggregate_Get_AggrCount( link->ml_ag, level ); ML_Aggregate_Get_AggrMap( link->ml_ag, level, &blockList ); ML_Gen_SmootherVBlockJacobi(ml,level,ML_PRESMOOTHER, sweeps, wght, Nblocks, blockList); break; } sweeps = link->post_sweeps; switch ( link->post ) { case 0 : ML_Gen_SmootherJacobi(ml, level, ML_POSTSMOOTHER, sweeps, wght); break; case 1 : ML_Gen_SmootherGaussSeidel(ml, level, ML_POSTSMOOTHER, sweeps); break; case 2 : ML_Gen_SmootherSymGaussSeidel(ml,level,ML_POSTSMOOTHER,sweeps,1.0); break; case 3 : Nblocks = ML_Aggregate_Get_AggrCount( link->ml_ag, level ); ML_Aggregate_Get_AggrMap( link->ml_ag, level, &blockList ); ML_Gen_SmootherVBlockGaussSeidel(ml,level,ML_POSTSMOOTHER, sweeps, Nblocks, blockList); break; case 4 : Nblocks = ML_Aggregate_Get_AggrCount( link->ml_ag, level ); ML_Aggregate_Get_AggrMap( link->ml_ag, level, &blockList ); ML_Gen_SmootherVBlockJacobi(ml,level,ML_POSTSMOOTHER, sweeps, wght, Nblocks, blockList); break; } } ML_Gen_CoarseSolverSuperLU(ml, coarsest_level); //ML_Gen_SmootherGaussSeidel(ml, coarsest_level, ML_PRESMOOTHER, 100); ML_Gen_Solver(ml, ML_MGV, nlevels-1, coarsest_level); return 0; }
HYPRE_Int AmgCGCGraphAssemble (hypre_ParCSRMatrix *S,HYPRE_Int *vertexrange,HYPRE_Int *CF_marker,HYPRE_Int *CF_marker_offd,HYPRE_Int coarsen_type, HYPRE_IJMatrix *ijG) /* assemble a graph representing the connections between the grids * ================================================================================================ * S : the strength matrix * vertexrange : the parallel layout of the candidate coarse grid vertices * CF_marker, CF_marker_offd : the coarse/fine markers * coarsen_type : the coarsening type * ijG : the created graph * ================================================================================================*/ { HYPRE_Int ierr=0; HYPRE_Int i,/* ii,*/ip,j,jj,m,n,p; HYPRE_Int mpisize,mpirank; HYPRE_Real weight; MPI_Comm comm = hypre_ParCSRMatrixComm(S); /* hypre_MPI_Status status; */ HYPRE_IJMatrix ijmatrix; hypre_CSRMatrix *S_diag = hypre_ParCSRMatrixDiag (S); hypre_CSRMatrix *S_offd = hypre_ParCSRMatrixOffd (S); /* HYPRE_Int *S_i = hypre_CSRMatrixI(S_diag); */ /* HYPRE_Int *S_j = hypre_CSRMatrixJ(S_diag); */ HYPRE_Int *S_offd_i = hypre_CSRMatrixI(S_offd); HYPRE_Int *S_offd_j = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (S_diag); HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (S_offd); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); HYPRE_Int pointrange_start,pointrange_end; HYPRE_Int *pointrange,*pointrange_nonlocal,*pointrange_strong=NULL; HYPRE_Int vertexrange_start,vertexrange_end; HYPRE_Int *vertexrange_strong= NULL; HYPRE_Int *vertexrange_nonlocal; HYPRE_Int num_recvs,num_recvs_strong; HYPRE_Int *recv_procs,*recv_procs_strong=NULL; HYPRE_Int /* *zeros,*rownz,*/*rownz_diag,*rownz_offd; HYPRE_Int nz; HYPRE_Int nlocal; HYPRE_Int one=1; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg (S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); /* determine neighbor processors */ num_recvs = hypre_ParCSRCommPkgNumRecvs (comm_pkg); recv_procs = hypre_ParCSRCommPkgRecvProcs (comm_pkg); pointrange = hypre_ParCSRMatrixRowStarts (S); pointrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); vertexrange_nonlocal = hypre_CTAlloc (HYPRE_Int, 2*num_recvs); #ifdef HYPRE_NO_GLOBAL_PARTITION { HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends (comm_pkg); HYPRE_Int *send_procs = hypre_ParCSRCommPkgSendProcs (comm_pkg); HYPRE_Int *int_buf_data = hypre_CTAlloc (HYPRE_Int,4*num_sends); HYPRE_Int *int_buf_data2 = int_buf_data + 2*num_sends; hypre_MPI_Request *sendrequest,*recvrequest; nlocal = vertexrange[1] - vertexrange[0]; pointrange_start = pointrange[0]; pointrange_end = pointrange[1]; vertexrange_start = vertexrange[0]; vertexrange_end = vertexrange[1]; sendrequest = hypre_CTAlloc (hypre_MPI_Request,2*(num_sends+num_recvs)); recvrequest = sendrequest+2*num_sends; for (i=0;i<num_recvs;i++) { hypre_MPI_Irecv (pointrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_pointrange,comm,&recvrequest[2*i]); hypre_MPI_Irecv (vertexrange_nonlocal+2*i,2,HYPRE_MPI_INT,recv_procs[i],tag_vertexrange,comm,&recvrequest[2*i+1]); } for (i=0;i<num_sends;i++) { int_buf_data[2*i] = pointrange_start; int_buf_data[2*i+1] = pointrange_end; int_buf_data2[2*i] = vertexrange_start; int_buf_data2[2*i+1] = vertexrange_end; hypre_MPI_Isend (int_buf_data+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_pointrange,comm,&sendrequest[2*i]); hypre_MPI_Isend (int_buf_data2+2*i,2,HYPRE_MPI_INT,send_procs[i],tag_vertexrange,comm,&sendrequest[2*i+1]); } hypre_MPI_Waitall (2*(num_sends+num_recvs),sendrequest,hypre_MPI_STATUSES_IGNORE); hypre_TFree (int_buf_data); hypre_TFree (sendrequest); } #else nlocal = vertexrange[mpirank+1] - vertexrange[mpirank]; pointrange_start = pointrange[mpirank]; pointrange_end = pointrange[mpirank+1]; vertexrange_start = vertexrange[mpirank]; vertexrange_end = vertexrange[mpirank+1]; for (i=0;i<num_recvs;i++) { pointrange_nonlocal[2*i] = pointrange[recv_procs[i]]; pointrange_nonlocal[2*i+1] = pointrange[recv_procs[i]+1]; vertexrange_nonlocal[2*i] = vertexrange[recv_procs[i]]; vertexrange_nonlocal[2*i+1] = vertexrange[recv_procs[i]+1]; } #endif /* now we have the array recv_procs. However, it may contain too many entries as it is inherited from A. We now have to determine the subset which contains only the strongly connected neighbors */ if (num_cols_offd) { S_offd_j = hypre_CSRMatrixJ(S_offd); recv_procs_strong = hypre_CTAlloc (HYPRE_Int,num_recvs); memset (recv_procs_strong,0,num_recvs*sizeof(HYPRE_Int)); /* don't forget to shorten the pointrange and vertexrange arrays accordingly */ pointrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (pointrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); vertexrange_strong = hypre_CTAlloc (HYPRE_Int,2*num_recvs); memset (vertexrange_strong,0,2*num_recvs*sizeof(HYPRE_Int)); for (i=0;i<num_variables;i++) for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = col_map_offd[S_offd_j[j]]; for (p=0;p<num_recvs;p++) /* S_offd_j is NOT sorted! */ if (jj >= pointrange_nonlocal[2*p] && jj < pointrange_nonlocal[2*p+1]) break; #if 0 hypre_printf ("Processor %d, remote point %d on processor %d\n",mpirank,jj,recv_procs[p]); #endif recv_procs_strong [p]=1; } for (p=0,num_recvs_strong=0;p<num_recvs;p++) { if (recv_procs_strong[p]) { recv_procs_strong[num_recvs_strong]=recv_procs[p]; pointrange_strong[2*num_recvs_strong] = pointrange_nonlocal[2*p]; pointrange_strong[2*num_recvs_strong+1] = pointrange_nonlocal[2*p+1]; vertexrange_strong[2*num_recvs_strong] = vertexrange_nonlocal[2*p]; vertexrange_strong[2*num_recvs_strong+1] = vertexrange_nonlocal[2*p+1]; num_recvs_strong++; } } } else num_recvs_strong=0; hypre_TFree (pointrange_nonlocal); hypre_TFree (vertexrange_nonlocal); rownz_diag = hypre_CTAlloc (HYPRE_Int,2*nlocal); rownz_offd = rownz_diag + nlocal; for (p=0,nz=0;p<num_recvs_strong;p++) { nz += vertexrange_strong[2*p+1]-vertexrange_strong[2*p]; } for (m=0;m<nlocal;m++) { rownz_diag[m]=nlocal-1; rownz_offd[m]=nz; } HYPRE_IJMatrixCreate(comm, vertexrange_start, vertexrange_end-1, vertexrange_start, vertexrange_end-1, &ijmatrix); HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); HYPRE_IJMatrixSetDiagOffdSizes (ijmatrix, rownz_diag, rownz_offd); HYPRE_IJMatrixInitialize(ijmatrix); hypre_TFree (rownz_diag); /* initialize graph */ weight = -1; for (m=vertexrange_start;m<vertexrange_end;m++) { for (p=0;p<num_recvs_strong;p++) { for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while initializing graphs at (%d, %d)\n",mpirank,ierr,m,n); #endif } } } /* weight graph */ for (i=0;i<num_variables;i++) { for (j=S_offd_i[i];j<S_offd_i[i+1];j++) { jj = S_offd_j[j]; /* jj is not a global index!!! */ /* determine processor */ for (p=0;p<num_recvs_strong;p++) if (col_map_offd[jj] >= pointrange_strong[2*p] && col_map_offd[jj] < pointrange_strong[2*p+1]) break; ip=recv_procs_strong[p]; /* loop over all coarse grids constructed on this processor domain */ for (m=vertexrange_start;m<vertexrange_end;m++) { /* loop over all coarse grids constructed on neighbor processor domain */ for (n=vertexrange_strong[2*p];n<vertexrange_strong[2*p+1];n++) { /* coarse grid counting inside gridpartition->local/gridpartition->nonlocal starts with one while counting inside range starts with zero */ if (CF_marker[i]-1==m && CF_marker_offd[jj]-1==n) /* C-C-coupling */ weight = -1; else if ( (CF_marker[i]-1==m && (CF_marker_offd[jj]==0 || CF_marker_offd[jj]-1!=n) ) || ( (CF_marker[i]==0 || CF_marker[i]-1!=m) && CF_marker_offd[jj]-1==n ) ) /* C-F-coupling */ weight = 0; else weight = -8; /* F-F-coupling */ ierr = HYPRE_IJMatrixAddToValues (ijmatrix,1,&one,&m,&n,&weight); #if 0 if (ierr) hypre_printf ("Processor %d: error %d while adding %lf to entry (%d, %d)\n",mpirank,ierr,weight,m,n); #endif } } } } /* assemble */ HYPRE_IJMatrixAssemble (ijmatrix); /*if (num_recvs_strong) {*/ hypre_TFree (recv_procs_strong); hypre_TFree (pointrange_strong); hypre_TFree (vertexrange_strong); /*} */ *ijG = ijmatrix; return (ierr); }
int hypre_SStructUMatrixSetBoxValues( hypre_SStructMatrix *matrix, int part, hypre_Index ilower, hypre_Index iupper, int var, int nentries, int *entries, double *values, int add_to ) { HYPRE_IJMatrix ijmatrix = hypre_SStructMatrixIJMatrix(matrix); hypre_SStructGraph *graph = hypre_SStructMatrixGraph(matrix); hypre_SStructGrid *grid = hypre_SStructGraphGrid(graph); hypre_SStructStencil *stencil = hypre_SStructGraphStencil(graph, part, var); int *vars = hypre_SStructStencilVars(stencil); hypre_Index *shape = hypre_SStructStencilShape(stencil); int size = hypre_SStructStencilSize(stencil); hypre_IndexRef offset; hypre_BoxMap *map; hypre_BoxMapEntry **map_entries; int nmap_entries; hypre_BoxMapEntry **map_to_entries; int nmap_to_entries; int nrows; int *ncols; HYPRE_BigInt *rows; HYPRE_BigInt *cols; double *ijvalues; hypre_Box *box; hypre_Box *to_box; hypre_Box *map_box; hypre_Box *int_box; hypre_Index index; hypre_Index rs, cs; int sy, sz; HYPRE_BigInt row_base, col_base; int val_base; int e, entry, ii, jj, i, j, k; int proc, myproc; /* GEC1002 the matrix type */ int matrix_type = hypre_SStructMatrixObjectType(matrix); box = hypre_BoxCreate(); /*------------------------------------------ * all stencil entries *------------------------------------------*/ if (entries[0] < size) { to_box = hypre_BoxCreate(); map_box = hypre_BoxCreate(); int_box = hypre_BoxCreate(); hypre_CopyIndex(ilower, hypre_BoxIMin(box)); hypre_CopyIndex(iupper, hypre_BoxIMax(box)); /* ZTODO: check that this change fixes multiple-entry problem */ nrows = hypre_BoxVolume(box)*nentries; ncols = hypre_CTAlloc(int, nrows); for (i = 0; i < nrows; i++) { ncols[i] = 1; } rows = hypre_CTAlloc(HYPRE_BigInt, nrows); cols = hypre_CTAlloc(HYPRE_BigInt, nrows); ijvalues = hypre_CTAlloc(double, nrows); sy = (hypre_IndexX(iupper) - hypre_IndexX(ilower) + 1); sz = (hypre_IndexY(iupper) - hypre_IndexY(ilower) + 1) * sy; map = hypre_SStructGridMap(grid, part, var); hypre_BoxMapIntersect(map, ilower, iupper, &map_entries, &nmap_entries); for (ii = 0; ii < nmap_entries; ii++) { /* Only Set values if I am the owner process; off-process AddTo and Get * values are done by IJ */ if (!add_to) { hypre_SStructMapEntryGetProcess(map_entries[ii], &proc); MPI_Comm_rank(hypre_SStructGridComm(grid), &myproc); if (proc != myproc) { continue; } } /* GEC1002 introducing the strides based on the type of the matrix */ hypre_SStructMapEntryGetStrides(map_entries[ii], rs, matrix_type); hypre_CopyIndex(ilower, hypre_BoxIMin(box)); hypre_CopyIndex(iupper, hypre_BoxIMax(box)); hypre_BoxMapEntryGetExtents(map_entries[ii], hypre_BoxIMin(map_box), hypre_BoxIMax(map_box)); hypre_IntersectBoxes(box, map_box, int_box); hypre_CopyBox(int_box, box); nrows = 0; for (e = 0; e < nentries; e++) { entry = entries[e]; hypre_CopyBox(box, to_box); offset = shape[entry]; hypre_BoxIMinX(to_box) += hypre_IndexX(offset); hypre_BoxIMinY(to_box) += hypre_IndexY(offset); hypre_BoxIMinZ(to_box) += hypre_IndexZ(offset); hypre_BoxIMaxX(to_box) += hypre_IndexX(offset); hypre_BoxIMaxY(to_box) += hypre_IndexY(offset); hypre_BoxIMaxZ(to_box) += hypre_IndexZ(offset); map = hypre_SStructGridMap(grid, part, vars[entry]); hypre_BoxMapIntersect(map, hypre_BoxIMin(to_box), hypre_BoxIMax(to_box), &map_to_entries, &nmap_to_entries ); for (jj = 0; jj < nmap_to_entries; jj++) { /* GEC1002 introducing the strides based on the type of the matrix */ hypre_SStructMapEntryGetStrides(map_to_entries[jj], cs, matrix_type); hypre_BoxMapEntryGetExtents(map_to_entries[jj], hypre_BoxIMin(map_box), hypre_BoxIMax(map_box)); hypre_IntersectBoxes(to_box, map_box, int_box); hypre_CopyIndex(hypre_BoxIMin(int_box), index); /* GEC1002 introducing the rank based on the type of the matrix */ hypre_SStructMapEntryGetGlobalRank(map_to_entries[jj], index, &col_base,matrix_type); hypre_IndexX(index) -= hypre_IndexX(offset); hypre_IndexY(index) -= hypre_IndexY(offset); hypre_IndexZ(index) -= hypre_IndexZ(offset); /* GEC1002 introducing the rank based on the type of the matrix */ hypre_SStructMapEntryGetGlobalRank(map_entries[ii], index, &row_base,matrix_type); hypre_IndexX(index) -= hypre_IndexX(ilower); hypre_IndexY(index) -= hypre_IndexY(ilower); hypre_IndexZ(index) -= hypre_IndexZ(ilower); val_base = e + (hypre_IndexX(index) + hypre_IndexY(index)*sy + hypre_IndexZ(index)*sz) * nentries; for (k = 0; k < hypre_BoxSizeZ(int_box); k++) { for (j = 0; j < hypre_BoxSizeY(int_box); j++) { for (i = 0; i < hypre_BoxSizeX(int_box); i++) { rows[nrows] = row_base + (HYPRE_BigInt)(i*rs[0] + j*rs[1] + k*rs[2]); cols[nrows] = col_base + (HYPRE_BigInt)(i*cs[0] + j*cs[1] + k*cs[2]); ijvalues[nrows] = values[val_base + (i + j*sy + k*sz)*nentries]; nrows++; } } } } hypre_TFree(map_to_entries); } /*------------------------------------------ * set IJ values one stencil entry at a time *------------------------------------------*/ if (add_to > 0) { HYPRE_IJMatrixAddToValues(ijmatrix, nrows, ncols, (const HYPRE_BigInt *) rows, (const HYPRE_BigInt *) cols, (const double *) ijvalues); } else if (add_to > -1) { HYPRE_IJMatrixSetValues(ijmatrix, nrows, ncols, (const HYPRE_BigInt *) rows, (const HYPRE_BigInt *) cols, (const double *) ijvalues); } else { HYPRE_IJMatrixGetValues(ijmatrix, nrows, ncols, rows, cols, values); } } hypre_TFree(map_entries); hypre_TFree(ncols); hypre_TFree(rows); hypre_TFree(cols); hypre_TFree(ijvalues); hypre_BoxDestroy(to_box); hypre_BoxDestroy(map_box); hypre_BoxDestroy(int_box); }
int hypre_ParCSRMatrixMatvec_FF( double alpha, hypre_ParCSRMatrix *A, hypre_ParVector *x, double beta, hypre_ParVector *y, int *CF_marker, int fpt ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); hypre_CSRMatrix *diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *offd = hypre_ParCSRMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); HYPRE_BigInt num_rows = hypre_ParCSRMatrixGlobalNumRows(A); HYPRE_BigInt num_cols = hypre_ParCSRMatrixGlobalNumCols(A); hypre_Vector *x_tmp; HYPRE_BigInt x_size = hypre_ParVectorGlobalSize(x); HYPRE_BigInt y_size = hypre_ParVectorGlobalSize(y); HYPRE_BigInt num_cols_offd = hypre_CSRMatrixNumCols(offd); int ierr = 0; int num_sends, i, j, index, start, num_procs; int *int_buf_data = NULL; int *CF_marker_offd = NULL; double *x_tmp_data = NULL; double *x_buf_data = NULL; double *x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. ParMatvec returns ierr = 11 if * length of X doesn't equal the number of columns of A, * ierr = 12 if the length of Y doesn't equal the number of rows * of A, and ierr = 13 if both are true. * * Because temporary vectors are often used in ParMatvec, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ MPI_Comm_size(comm,&num_procs); if (num_cols != x_size) ierr = 11; if (num_rows != y_size) ierr = 12; if (num_cols != x_size && num_rows != y_size) ierr = 13; if (num_procs > 1) { if (num_cols_offd) { x_tmp = hypre_SeqVectorCreate( num_cols_offd ); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); } /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_NewCommPkgCreate(A); #else hypre_MatvecCommPkgCreate(A); #endif comm_pkg = hypre_ParCSRMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); if (num_sends) x_buf_data = hypre_CTAlloc(double, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) x_buf_data[index++] = x_local_data[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate ( 1, comm_pkg, x_buf_data, x_tmp_data ); } hypre_CSRMatrixMatvec_FF( alpha, diag, x_local, beta, y_local, CF_marker, CF_marker, fpt); if (num_procs > 1) { hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_sends) int_buf_data = hypre_CTAlloc(int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); if (num_cols_offd) CF_marker_offd = hypre_CTAlloc(int, num_cols_offd); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); j++) int_buf_data[index++] = CF_marker[hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)]; } comm_handle = hypre_ParCSRCommHandleCreate(11,comm_pkg,int_buf_data,CF_marker_offd ); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRMatrixMatvec_FF( alpha, offd, x_tmp, 1.0, y_local, CF_marker, CF_marker_offd, fpt); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); hypre_TFree(int_buf_data); hypre_TFree(CF_marker_offd); }
HYPRE_Int hypre_AMGeAgglomerate(HYPRE_Int *i_AE_element, HYPRE_Int *j_AE_element, HYPRE_Int *i_face_face, HYPRE_Int *j_face_face, HYPRE_Int *w_face_face, HYPRE_Int *i_face_element, HYPRE_Int *j_face_element, HYPRE_Int *i_element_face, HYPRE_Int *j_element_face, HYPRE_Int *i_face_to_prefer_weight, HYPRE_Int *i_face_weight, HYPRE_Int num_faces, HYPRE_Int num_elements, HYPRE_Int *num_AEs_pointer) { HYPRE_Int ierr = 0; HYPRE_Int i, j, k, l; HYPRE_Int face_to_eliminate; HYPRE_Int max_weight_old, max_weight; HYPRE_Int AE_counter=0, AE_element_counter=0; /* HYPRE_Int i_element_face_counter; */ HYPRE_Int *i_element_to_AE; HYPRE_Int *previous, *next, *first; HYPRE_Int head, tail, last; HYPRE_Int face_max_weight, face_local_max_weight, preferred_weight; HYPRE_Int weight, weight_max; max_weight = 1; for (i=0; i < num_faces; i++) { weight = 1; for (j=i_face_face[i]; j < i_face_face[i+1]; j++) weight+= w_face_face[j]; if (max_weight < weight) max_weight = weight; } first = hypre_CTAlloc(HYPRE_Int, max_weight+1); next = hypre_CTAlloc(HYPRE_Int, num_faces); previous = hypre_CTAlloc(HYPRE_Int, num_faces+1); tail = num_faces; head = -1; for (i=0; i < num_faces; i++) { next[i] = i+1; previous[i] = i-1; } last = num_faces-1; previous[tail] = last; for (weight=1; weight <= max_weight; weight++) first[weight] = tail; i_element_to_AE = hypre_CTAlloc(HYPRE_Int, num_elements); /*======================================================================= AGGLOMERATION PROCEDURE: ======================================================================= */ for (k=0; k < num_elements; k++) i_element_to_AE[k] = -1; for (k=0; k < num_faces; k++) i_face_weight[k] = 1; first[0] = 0; first[1] = 0; last = previous[tail]; weight_max = i_face_weight[last]; k = last; face_max_weight = -1; while (k!= head) { if (i_face_to_prefer_weight[k] > -1) face_max_weight = k; if (face_max_weight > -1) break; k=previous[k]; } /* this will be used if the faces have been sorted: ***************** k = last; face_max_weight = -1; while (k != head) { if (i_face_to_prefer_weight[k] > -1) face_max_weight = k; if (face_max_weight > -1) { max_weight = i_face_weight[face_max_weight]; l = face_max_weight; while (previous[l] != head) { if (i_face_weight[previous[l]] < max_weight) break; else if (i_face_to_prefer_weight[previous[l]] > i_face_to_prefer_weight[face_max_weight]) { l = previous[l]; face_max_weight = l; } else l = previous[l]; } break; } l =previous[k]; weight = i_face_weight[k]; last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = remove_entry(weight, &weight_max, previous, next, first, &last, head, tail, k); k=l; } */ if (face_max_weight == -1) { hypre_printf("all faces are unacceptable, i.e., no faces to eliminate !\n"); *num_AEs_pointer = 1; i_AE_element[0] = 0; for (i=0; i < num_elements; i++) { i_element_to_AE[i] = 0; j_AE_element[i] = i; } i_AE_element[1] = num_elements; return ierr; } for (k=0; k < num_faces; k++) if (i_face_to_prefer_weight[k] > i_face_to_prefer_weight[face_max_weight]) face_max_weight = k; max_weight = i_face_weight[face_max_weight]; AE_counter=0; AE_element_counter=0; i_AE_element[AE_counter] = AE_element_counter; max_weight_old = -1; face_local_max_weight = face_max_weight; eliminate_face: face_to_eliminate = face_local_max_weight; max_weight = i_face_weight[face_to_eliminate]; last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = remove_entry(max_weight, &weight_max, previous, next, first, &last, head, tail, face_to_eliminate); i_face_weight[face_to_eliminate] = 0; /*---------------------------------------------------------- * agglomeration step: * * put on AE_element -- list all elements * that share face "face_to_eliminate"; *----------------------------------------------------------*/ for (k = i_face_element[face_to_eliminate]; k < i_face_element[face_to_eliminate+1]; k++) { /* check if element j_face_element[k] is already on the list: */ if (j_face_element[k] < num_elements) { if (i_element_to_AE[j_face_element[k]] == -1) { j_AE_element[AE_element_counter] = j_face_element[k]; i_element_to_AE[j_face_element[k]] = AE_counter; AE_element_counter++; } } } /* local update & search:==================================== */ for (j=i_face_face[face_to_eliminate]; j<i_face_face[face_to_eliminate+1]; j++) if (i_face_weight[j_face_face[j]] > 0) { weight = i_face_weight[j_face_face[j]]; last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = move_entry(weight, &weight_max, previous, next, first, &last, head, tail, j_face_face[j]); i_face_weight[j_face_face[j]]+=w_face_face[j]; weight = i_face_weight[j_face_face[j]]; /* hypre_printf("update entry: %d\n", j_face_face[j]); */ last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = update_entry(weight, &weight_max, previous, next, first, &last, head, tail, j_face_face[j]); last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; } /* find a face of the elements that have already been agglomerated with a maximal weight: ====================================== */ max_weight_old = max_weight; face_local_max_weight = -1; preferred_weight = -1; for (l = i_AE_element[AE_counter]; l < AE_element_counter; l++) { for (j=i_element_face[j_AE_element[l]]; j<i_element_face[j_AE_element[l]+1]; j++) { i = j_element_face[j]; if (max_weight_old > 1 && i_face_weight[i] > 0 && i_face_to_prefer_weight[i] > -1) { if ( max_weight < i_face_weight[i]) { face_local_max_weight = i; max_weight = i_face_weight[i]; preferred_weight = i_face_to_prefer_weight[i]; } if ( max_weight == i_face_weight[i] && i_face_to_prefer_weight[i] > preferred_weight) { face_local_max_weight = i; preferred_weight = i_face_to_prefer_weight[i]; } } } } if (face_local_max_weight > -1) goto eliminate_face; /* ---------------------------------------------------------------- * eliminate and label with i_face_weight[ ] = -1 * "boundary faces of agglomerated elements"; * those faces will be preferred for the next coarse spaces * in case multiple coarse spaces are to be built; * ---------------------------------------------------------------*/ for (k = i_AE_element[AE_counter]; k < AE_element_counter; k++) { for (j = i_element_face[j_AE_element[k]]; j < i_element_face[j_AE_element[k]+1]; j++) { if (i_face_weight[j_element_face[j]] > 0) { weight = i_face_weight[j_element_face[j]]; last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = remove_entry(weight, &weight_max, previous, next, first, &last, head, tail, j_element_face[j]); i_face_weight[j_element_face[j]] = -1; } } } if (AE_element_counter > i_AE_element[AE_counter]) { /* hypre_printf("completing agglomerated element: %d\n", AE_counter); */ AE_counter++; } i_AE_element[AE_counter] = AE_element_counter; /* find a face with maximal weight: ---------------------------*/ last = previous[tail]; if (last == head) goto end_agglomerate; weight_max = i_face_weight[last]; /* hypre_printf("global search: ======================================\n"); */ face_max_weight = -1; k = last; while (k != head) { if (i_face_to_prefer_weight[k] > -1) face_max_weight = k; if (face_max_weight > -1) { max_weight = i_face_weight[face_max_weight]; l = face_max_weight; while (previous[l] != head) { if (i_face_weight[previous[l]] < max_weight) break; else if (i_face_to_prefer_weight[previous[l]] > i_face_to_prefer_weight[face_max_weight]) { l = previous[l]; face_max_weight = l; } else l = previous[l]; } break; } l =previous[k]; /* remove face k: ---------------------------------------*/ weight = i_face_weight[k]; last = previous[tail]; if (last == head) weight_max = 0; else weight_max = i_face_weight[last]; ierr = remove_entry(weight, &weight_max, previous, next, first, &last, head, tail, k); /* i_face_weight[k] = -1; */ k=l; } if (face_max_weight == -1) goto end_agglomerate; max_weight = i_face_weight[face_max_weight]; face_local_max_weight = face_max_weight; goto eliminate_face; end_agglomerate: /* eliminate isolated elements: ----------------------------------*/ for (i=0; i<num_elements; i++) { if (i_element_to_AE[i] == -1) { for (j=i_element_face[i]; j < i_element_face[i+1] && i_element_to_AE[i] == -1; j++) if (i_face_to_prefer_weight[j_element_face[j]] > -1) for (k=i_face_element[j_element_face[j]]; k<i_face_element[j_element_face[j]+1] && i_element_to_AE[i] == -1; k++) if (i_element_to_AE[j_face_element[k]] != -1) i_element_to_AE[i] = i_element_to_AE[j_face_element[k]]; } /* if (i_element_to_AE[i] == -1) { i_element_face_counter = 0; for (j=i_element_face[i]; j < i_element_face[i+1]; j++) if (i_face_to_prefer_weight[j_element_face[j]] > -1) i_element_face_counter++; if (i_element_face_counter == 1) { for (j=i_element_face[i]; j < i_element_face[i+1]; j++) if (i_face_to_prefer_weight[j_element_face[j]] > -1) for (k=i_face_element[j_element_face[j]]; k<i_face_element[j_element_face[j]+1]; k++) if (i_element_to_AE[j_face_element[k]] != -1) i_element_to_AE[i] = i_element_to_AE[j_face_element[k]]; } } */ if (i_element_to_AE[i] == -1) { i_element_to_AE[i] = AE_counter; AE_counter++; } } num_AEs_pointer[0] = AE_counter; /* compute adjoint graph: -------------------------------------------*/ for (i=0; i < AE_counter; i++) i_AE_element[i] = 0; for (i=0; i < num_elements; i++) i_AE_element[i_element_to_AE[i]]++; i_AE_element[AE_counter] = num_elements; for (i=AE_counter-1; i > -1; i--) i_AE_element[i] = i_AE_element[i+1] - i_AE_element[i]; for (i=0; i < num_elements; i++) { j_AE_element[i_AE_element[i_element_to_AE[i]]] = i; i_AE_element[i_element_to_AE[i]]++; } for (i=AE_counter-1; i > -1; i--) i_AE_element[i+1] = i_AE_element[i]; i_AE_element[0] = 0; /*--------------------------------------------------------------------*/ for (i=0; i < num_faces; i++) if (i_face_to_prefer_weight[i] == -1) i_face_weight[i] = -1; hypre_TFree(i_element_to_AE); hypre_TFree(previous); hypre_TFree(next); hypre_TFree(first); return ierr; }
HYPRE_Int hypre_ParCSRBlockMatrixMatvecT( HYPRE_Complex alpha, hypre_ParCSRBlockMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y ) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); hypre_CSRBlockMatrix *diag = hypre_ParCSRBlockMatrixDiag(A); hypre_CSRBlockMatrix *offd = hypre_ParCSRBlockMatrixOffd(A); hypre_Vector *x_local = hypre_ParVectorLocalVector(x); hypre_Vector *y_local = hypre_ParVectorLocalVector(y); hypre_Vector *y_tmp; HYPRE_Complex *y_local_data; HYPRE_Int blk_size = hypre_ParCSRBlockMatrixBlockSize(A); HYPRE_Int x_size = hypre_ParVectorGlobalSize(x); HYPRE_Int y_size = hypre_ParVectorGlobalSize(y); HYPRE_Complex *y_tmp_data, *y_buf_data; HYPRE_Int num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(A); HYPRE_Int num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(A); HYPRE_Int num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); HYPRE_Int i, j, index, start, finish, elem, num_sends; HYPRE_Int size, k; HYPRE_Int ierr = 0; /*--------------------------------------------------------------------- * Check for size compatibility. MatvecT returns ierr = 1 if * length of X doesn't equal the number of rows of A, * ierr = 2 if the length of Y doesn't equal the number of * columns of A, and ierr = 3 if both are true. * * Because temporary vectors are often used in MatvecT, none of * these conditions terminates processing, and the ierr flag * is informational only. *--------------------------------------------------------------------*/ if (num_rows*blk_size != x_size) ierr = 1; if (num_cols*blk_size != y_size) ierr = 2; if (num_rows*blk_size != x_size && num_cols*blk_size != y_size) ierr = 3; /*----------------------------------------------------------------------- *-----------------------------------------------------------------------*/ y_tmp = hypre_SeqVectorCreate(num_cols_offd*blk_size); hypre_SeqVectorInitialize(y_tmp); /*--------------------------------------------------------------------- * If there exists no CommPkg for A, a CommPkg is generated using * equally load balanced partitionings *--------------------------------------------------------------------*/ if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); size = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)*blk_size; y_buf_data = hypre_CTAlloc(HYPRE_Complex, size); y_tmp_data = hypre_VectorData(y_tmp); y_local_data = hypre_VectorData(y_local); if (num_cols_offd) hypre_CSRBlockMatrixMatvecT(alpha, offd, x_local, 0.0, y_tmp); comm_handle = hypre_ParCSRBlockCommHandleCreate ( 2, blk_size, comm_pkg, y_tmp_data, y_buf_data); hypre_CSRBlockMatrixMatvecT(alpha, diag, x_local, beta, y_local); hypre_ParCSRCommHandleDestroy(comm_handle); comm_handle = NULL; index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); finish = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); for (j = start; j < finish; j++) { elem = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j)*blk_size; for (k = 0; k < blk_size; k++) { y_local_data[elem++] += y_buf_data[index++]; } } } hypre_TFree(y_buf_data); hypre_SeqVectorDestroy(y_tmp); y_tmp = NULL; return ierr; }
/***************************************************************************** * * Routine for constructing graph domain_dof with minimal overlap * and computing the respective matrix inverses to be * used in an overlapping Schwarz procedure (like smoother * in AMG); * *****************************************************************************/ HYPRE_Int hypre_AMGCreateDomainDof(hypre_CSRMatrix *A, HYPRE_Int **i_domain_dof_pointer, HYPRE_Int **j_domain_dof_pointer, HYPRE_Real **domain_matrixinverse_pointer, HYPRE_Int *num_domains_pointer) { HYPRE_Int *i_domain_dof, *j_domain_dof; HYPRE_Real *domain_matrixinverse; HYPRE_Int num_domains; HYPRE_Int *i_dof_dof = hypre_CSRMatrixI(A); HYPRE_Int *j_dof_dof = hypre_CSRMatrixJ(A); HYPRE_Real *a_dof_dof = hypre_CSRMatrixData(A); HYPRE_Int num_dofs = hypre_CSRMatrixNumRows(A); /* HYPRE_Int *i_dof_to_accept_weight; */ HYPRE_Int *i_dof_to_prefer_weight, *w_dof_dof, *i_dof_weight; HYPRE_Int *i_dof_to_aggregate, *i_aggregate_dof, *j_aggregate_dof; HYPRE_Int *i_dof_index; HYPRE_Int ierr = 0; HYPRE_Int i,j,k, l_loc, i_loc, j_loc; HYPRE_Int i_dof; HYPRE_Int *i_local_to_global; HYPRE_Int *i_global_to_local; HYPRE_Int local_dof_counter, max_local_dof_counter=0; HYPRE_Int domain_dof_counter = 0, domain_matrixinverse_counter = 0; HYPRE_Real *AE, *XE; /* PCG arrays: --------------------------------------------------- */ /* HYPRE_Real *x, *rhs, *v, *w, *d, *aux; HYPRE_Int max_iter; */ /* --------------------------------------------------------------------- */ /*=======================================================================*/ /* create artificial domains by agglomeration; */ /*=======================================================================*/ hypre_printf("----------- create artificials domain by agglomeration; ======\n"); i_dof_to_prefer_weight = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); w_dof_dof = (HYPRE_Int *) malloc(i_dof_dof[num_dofs] * sizeof(HYPRE_Int)); for (i=0; i < num_dofs; i++) i_dof_to_prefer_weight[i] = 0; for (i=0; i<num_dofs; i++) for (j=i_dof_dof[i]; j< i_dof_dof[i+1]; j++) { if (j_dof_dof[j] == i) w_dof_dof[j]=0; else w_dof_dof[j]=1; } hypre_printf("end computing weights for agglomeration procedure: --------\n"); i_dof_weight = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); i_aggregate_dof = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); j_aggregate_dof= (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); ierr = hypre_AMGeAgglomerate(i_aggregate_dof, j_aggregate_dof, i_dof_dof, j_dof_dof, w_dof_dof, i_dof_dof, j_dof_dof, i_dof_dof, j_dof_dof, i_dof_to_prefer_weight, i_dof_weight, num_dofs, num_dofs, &num_domains); hypre_printf("num_dofs: %d, num_domains: %d\n", num_dofs, num_domains); i_dof_to_aggregate = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); for (i=0; i < num_domains; i++) for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) i_dof_to_aggregate[j_aggregate_dof[j]] = i; /* hypre_printf("========================================================\n"); hypre_printf("== artificial non--overlapping domains (aggregates): ===\n"); hypre_printf("========================================================\n"); for (i=0; i < num_domains; i++) { hypre_printf("\n aggregate %d:\n", i); for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) hypre_printf("%d, ", j_aggregate_dof[j]); hypre_printf("\n"); } */ free(i_dof_to_prefer_weight); free(i_dof_weight); free(w_dof_dof); /* make domains from aggregates: *********************************/ i_domain_dof = (HYPRE_Int *) malloc((num_domains+1) * sizeof(HYPRE_Int)); i_dof_index = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); for (i=0; i < num_dofs; i++) i_dof_index[i] = -1; domain_dof_counter=0; for (i=0; i < num_domains; i++) { i_domain_dof[i] = domain_dof_counter; for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) if (i_dof_to_aggregate[j_dof_dof[k]] >= i && i_dof_index[j_dof_dof[k]]==-1) { i_dof_index[j_dof_dof[k]]++; domain_dof_counter++; } for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) i_dof_index[j_dof_dof[k]]=-1; } i_domain_dof[num_domains] = domain_dof_counter; j_domain_dof = (HYPRE_Int *) malloc(domain_dof_counter * sizeof(HYPRE_Int)); domain_dof_counter=0; for (i=0; i < num_domains; i++) { for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) if (i_dof_to_aggregate[j_dof_dof[k]] >= i && i_dof_index[j_dof_dof[k]]==-1) { i_dof_index[j_dof_dof[k]]++; j_domain_dof[domain_dof_counter] = j_dof_dof[k]; domain_dof_counter++; } for (j=i_aggregate_dof[i]; j < i_aggregate_dof[i+1]; j++) for (k=i_dof_dof[j_aggregate_dof[j]]; k<i_dof_dof[j_aggregate_dof[j]+1]; k++) i_dof_index[j_dof_dof[k]]=-1; } free(i_aggregate_dof); free(j_aggregate_dof); free(i_dof_to_aggregate); /* i_domain_dof = i_aggregate_dof; j_domain_dof = j_aggregate_dof; */ hypre_printf("END domain_dof computations: =================================\n"); domain_matrixinverse_counter = 0; local_dof_counter = 0; for (i=0; i < num_domains; i++) { local_dof_counter = i_domain_dof[i+1]-i_domain_dof[i]; domain_matrixinverse_counter+= local_dof_counter * local_dof_counter; if (local_dof_counter > max_local_dof_counter) max_local_dof_counter = local_dof_counter; } domain_matrixinverse = hypre_CTAlloc(HYPRE_Real, domain_matrixinverse_counter); i_local_to_global = hypre_CTAlloc(HYPRE_Int, max_local_dof_counter); AE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); XE = hypre_CTAlloc(HYPRE_Real, max_local_dof_counter * max_local_dof_counter); /* i_dof_index = (HYPRE_Int *) malloc(num_dofs * sizeof(HYPRE_Int)); */ i_global_to_local = i_dof_index; for (i=0; i < num_dofs; i++) i_global_to_local[i] = -1; domain_matrixinverse_counter = 0; for (i=0; i < num_domains; i++) { local_dof_counter = 0; for (j=i_domain_dof[i]; j < i_domain_dof[i+1]; j++) { i_global_to_local[j_domain_dof[j]] = local_dof_counter; i_local_to_global[local_dof_counter] = j_domain_dof[j]; local_dof_counter++; } /* get local matrix in AE: ======================================== */ for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) AE[i_loc + j_loc * local_dof_counter] = 0.e0; for (i_loc=0; i_loc < local_dof_counter; i_loc++) { i_dof = i_local_to_global[i_loc]; for (j=i_dof_dof[i_dof]; j < i_dof_dof[i_dof+1]; j++) { j_loc = i_global_to_local[j_dof_dof[j]]; if (j_loc >=0) AE[i_loc + j_loc * local_dof_counter] = a_dof_dof[j]; } } /* get block for Schwarz smoother: ============================= */ ierr = matinv(XE, AE, local_dof_counter); /* hypre_printf("ierr_AE_inv: %d\n", ierr); */ for (i_loc=0; i_loc < local_dof_counter; i_loc++) for (j_loc=0; j_loc < local_dof_counter; j_loc++) domain_matrixinverse[domain_matrixinverse_counter + i_loc + j_loc * local_dof_counter] = XE[i_loc + j_loc * local_dof_counter]; domain_matrixinverse_counter+=local_dof_counter*local_dof_counter; for (l_loc=0; l_loc < local_dof_counter; l_loc++) i_global_to_local[i_local_to_global[l_loc]] = -1; } hypre_TFree(i_local_to_global); hypre_TFree(AE); hypre_TFree(XE); hypre_TFree(i_dof_index); *i_domain_dof_pointer = i_domain_dof; *j_domain_dof_pointer = j_domain_dof; *num_domains_pointer = num_domains; *domain_matrixinverse_pointer = domain_matrixinverse; /* x = hypre_CTAlloc(HYPRE_Real, num_dofs); rhs = hypre_CTAlloc(HYPRE_Real, num_dofs); v = hypre_CTAlloc(HYPRE_Real, num_dofs); w = hypre_CTAlloc(HYPRE_Real, num_dofs); d = hypre_CTAlloc(HYPRE_Real, num_dofs); aux = hypre_CTAlloc(HYPRE_Real, num_dofs); for (i=0; i < num_dofs; i++) x[i] = 0.e0; for (i=0; i < num_dofs; i++) rhs[i] = rand(); max_iter = 1000; hypre_printf("\nenter SchwarzPCG: =======================================\n"); ierr = hypre_Schwarzpcg(x, rhs, a_dof_dof, i_dof_dof, j_dof_dof, i_domain_dof, j_domain_dof, domain_matrixinverse, num_domains, v, w, d, aux, max_iter, num_dofs); hypre_printf("\n\n=======================================================\n"); hypre_printf(" END test PCG solve: \n"); hypre_printf("===========================================================\n"); hypre_TFree(x); hypre_TFree(rhs); hypre_TFree(aux); hypre_TFree(v); hypre_TFree(w); hypre_TFree(d); hypre_TFree(i_domain_dof); hypre_TFree(j_domain_dof); hypre_TFree(domain_matrixinverse); */ return ierr; }
HYPRE_Int hypre_ParCSRBlockMatrixMatvec(HYPRE_Complex alpha, hypre_ParCSRBlockMatrix *A, hypre_ParVector *x, HYPRE_Complex beta, hypre_ParVector *y) { hypre_ParCSRCommHandle *comm_handle; hypre_ParCSRCommPkg *comm_pkg; hypre_CSRBlockMatrix *diag, *offd; hypre_Vector *x_local, *y_local, *x_tmp; HYPRE_Int i, j, k, index, num_rows, num_cols; HYPRE_Int blk_size, x_size, y_size, size; HYPRE_Int num_cols_offd, start, finish, elem; HYPRE_Int ierr = 0, nprocs, num_sends, mypid; HYPRE_Complex *x_tmp_data, *x_buf_data, *x_local_data; hypre_MPI_Comm_size(hypre_ParCSRBlockMatrixComm(A), &nprocs); hypre_MPI_Comm_rank(hypre_ParCSRBlockMatrixComm(A), &mypid); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); num_rows = hypre_ParCSRBlockMatrixGlobalNumRows(A); num_cols = hypre_ParCSRBlockMatrixGlobalNumCols(A); blk_size = hypre_ParCSRBlockMatrixBlockSize(A); diag = hypre_ParCSRBlockMatrixDiag(A); offd = hypre_ParCSRBlockMatrixOffd(A); num_cols_offd = hypre_CSRBlockMatrixNumCols(offd); x_local = hypre_ParVectorLocalVector(x); y_local = hypre_ParVectorLocalVector(y); x_size = hypre_ParVectorGlobalSize(x); y_size = hypre_ParVectorGlobalSize(y); x_local_data = hypre_VectorData(x_local); /*--------------------------------------------------------------------- * Check for size compatibility. *--------------------------------------------------------------------*/ if (num_cols*blk_size != x_size) ierr = 11; if (num_rows*blk_size != y_size) ierr = 12; if (num_cols*blk_size != x_size && num_rows*blk_size != y_size) ierr = 13; if (nprocs > 1) { x_tmp = hypre_SeqVectorCreate(num_cols_offd*blk_size); hypre_SeqVectorInitialize(x_tmp); x_tmp_data = hypre_VectorData(x_tmp); if (!comm_pkg) { hypre_BlockMatvecCommPkgCreate(A); comm_pkg = hypre_ParCSRBlockMatrixCommPkg(A); } num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); size = hypre_ParCSRCommPkgSendMapStart(comm_pkg,num_sends)*blk_size; x_buf_data = hypre_CTAlloc(HYPRE_Complex, size); index = 0; for (i = 0; i < num_sends; i++) { start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); finish = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i+1); for (j = start; j < finish; j++) { elem = hypre_ParCSRCommPkgSendMapElmt(comm_pkg,j)*blk_size; for (k = 0; k < blk_size; k++) x_buf_data[index++] = x_local_data[elem++]; } } comm_handle = hypre_ParCSRBlockCommHandleCreate(1, blk_size,comm_pkg, x_buf_data, x_tmp_data); } hypre_CSRBlockMatrixMatvec(alpha, diag, x_local, beta, y_local); if (nprocs > 1) { hypre_ParCSRBlockCommHandleDestroy(comm_handle); comm_handle = NULL; if (num_cols_offd) hypre_CSRBlockMatrixMatvec(alpha,offd,x_tmp,1.0,y_local); hypre_SeqVectorDestroy(x_tmp); x_tmp = NULL; hypre_TFree(x_buf_data); } return ierr; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; HYPRE_Real *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); HYPRE_Int redundant = hypre_ParAMGDataRedundant(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); /*if (A_coarse)*/ if (hypre_ParAMGDataParticipate(amg_data)) { HYPRE_Real *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; HYPRE_Real *recv_buf = NULL; HYPRE_Int *displs = NULL; HYPRE_Int *info = NULL; HYPRE_Int new_num_procs, my_id; hypre_MPI_Comm_size(new_comm, &new_num_procs); hypre_MPI_Comm_rank(new_comm, &my_id); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; if (redundant) hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); else hypre_MPI_Gather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); if (redundant || my_id ==0) { displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; if (F_coarse) { tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); } } if (redundant) hypre_MPI_Allgatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); else hypre_MPI_Gatherv ( f_data, nf, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); if (redundant || my_id ==0) { tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); } /*then u */ if (redundant) { hypre_MPI_Allgatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, new_comm ); hypre_TFree(displs); hypre_TFree(info); } else hypre_MPI_Gatherv ( u_data, n, HYPRE_MPI_REAL, recv_buf, info, displs, HYPRE_MPI_REAL, 0, new_comm ); /* clean up */ if (redundant || my_id ==0) { hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); } /*copy my part of U to parallel vector */ if (redundant) { HYPRE_Real *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } else { HYPRE_Real *local_data=NULL; if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, u_data, n, HYPRE_MPI_REAL, 0, new_comm ); /*if (my_id == 0) local_data = hypre_VectorData(hypre_ParVectorLocalVector(F_coarse)); hypre_MPI_Scatterv ( local_data, info, displs, HYPRE_MPI_REAL, f_data, n, HYPRE_MPI_REAL, 0, new_comm );*/ if (my_id == 0) hypre_TFree(displs); hypre_TFree(info); } } return(Solve_err_flag); }
/*-------------------------------------------------------------------------- * hypre_FacZeroCFSten: Zeroes the coarse stencil coefficients that reach * into an underlying coarsened refinement box. * Algo: For each cbox * { * 1) refine cbox and expand by one in each direction * 2) boxman_intersect with the fboxman * 3) loop over intersection boxes to see if stencil * reaches over. * } *--------------------------------------------------------------------------*/ HYPRE_Int hypre_FacZeroCFSten( hypre_SStructPMatrix *Af, hypre_SStructPMatrix *Ac, hypre_SStructGrid *grid, HYPRE_Int fine_part, hypre_Index rfactors ) { hypre_BoxManager *fboxman; hypre_BoxManEntry **boxman_entries; HYPRE_Int nboxman_entries; hypre_SStructPGrid *p_cgrid; hypre_Box fgrid_box; hypre_StructGrid *cgrid; hypre_BoxArray *cgrid_boxes; hypre_Box *cgrid_box; hypre_Box scaled_box; hypre_Box *shift_ibox; hypre_StructMatrix *smatrix; hypre_StructStencil *stencils; HYPRE_Int stencil_size; hypre_Index refine_factors, upper_shift; hypre_Index stride; hypre_Index stencil_shape; hypre_Index zero_index, ilower, iupper; HYPRE_Int nvars, var1, var2; HYPRE_Int ndim; hypre_Box *ac_dbox; HYPRE_Real *ac_ptr; hypre_Index loop_size; HYPRE_Int iac; HYPRE_Int ci, i, j; HYPRE_Int abs_shape; HYPRE_Int ierr = 0; p_cgrid = hypre_SStructPMatrixPGrid(Ac); nvars = hypre_SStructPMatrixNVars(Ac); ndim = hypre_SStructPGridNDim(p_cgrid); hypre_BoxInit(&fgrid_box, ndim); hypre_BoxInit(&scaled_box, ndim); hypre_ClearIndex(zero_index); hypre_ClearIndex(stride); hypre_ClearIndex(upper_shift); for (i= 0; i< ndim; i++) { stride[i]= 1; upper_shift[i]= rfactors[i]-1; } hypre_CopyIndex(rfactors, refine_factors); if (ndim < 3) { for (i= ndim; i< 3; i++) { refine_factors[i]= 1; } } for (var1= 0; var1< nvars; var1++) { cgrid= hypre_SStructPGridSGrid(hypre_SStructPMatrixPGrid(Ac), var1); cgrid_boxes= hypre_StructGridBoxes(cgrid); fboxman= hypre_SStructGridBoxManager(grid, fine_part, var1); /*------------------------------------------------------------------ * For each parent coarse box find all fboxes that may be connected * through a stencil entry- refine this box, expand it by one * in each direction, and boxman_intersect with fboxman *------------------------------------------------------------------*/ hypre_ForBoxI(ci, cgrid_boxes) { cgrid_box= hypre_BoxArrayBox(cgrid_boxes, ci); hypre_StructMapCoarseToFine(hypre_BoxIMin(cgrid_box), zero_index, refine_factors, hypre_BoxIMin(&scaled_box)); hypre_StructMapCoarseToFine(hypre_BoxIMax(cgrid_box), upper_shift, refine_factors, hypre_BoxIMax(&scaled_box)); hypre_SubtractIndexes(hypre_BoxIMin(&scaled_box), stride, 3, hypre_BoxIMin(&scaled_box)); hypre_AddIndexes(hypre_BoxIMax(&scaled_box), stride, 3, hypre_BoxIMax(&scaled_box)); hypre_BoxManIntersect(fboxman, hypre_BoxIMin(&scaled_box), hypre_BoxIMax(&scaled_box), &boxman_entries, &nboxman_entries); for (var2= 0; var2< nvars; var2++) { stencils= hypre_SStructPMatrixSStencil(Ac, var1, var2); if (stencils != NULL) { stencil_size= hypre_StructStencilSize(stencils); smatrix = hypre_SStructPMatrixSMatrix(Ac, var1, var2); ac_dbox = hypre_BoxArrayBox(hypre_StructMatrixDataSpace(smatrix), ci); /*--------------------------------------------------------- * Find the stencil coefficients that must be zeroed off. * Loop over all possible boxes. *---------------------------------------------------------*/ for (i= 0; i< stencil_size; i++) { hypre_CopyIndex(hypre_StructStencilElement(stencils, i), stencil_shape); AbsStencilShape(stencil_shape, abs_shape); if (abs_shape) /* non-centre stencils are zeroed */ { /* look for connecting fboxes that must be zeroed. */ for (j= 0; j< nboxman_entries; j++) { hypre_BoxManEntryGetExtents(boxman_entries[j], ilower, iupper); hypre_BoxSetExtents(&fgrid_box, ilower, iupper); shift_ibox= hypre_CF_StenBox(&fgrid_box, cgrid_box, stencil_shape, refine_factors, ndim); if ( hypre_BoxVolume(shift_ibox) ) { ac_ptr= hypre_StructMatrixExtractPointerByIndex(smatrix, ci, stencil_shape); hypre_BoxGetSize(shift_ibox, loop_size); hypre_BoxLoop1Begin(ndim, loop_size, ac_dbox, hypre_BoxIMin(shift_ibox), stride, iac); #ifdef HYPRE_USING_OPENMP #pragma omp parallel for private(HYPRE_BOX_PRIVATE,iac) HYPRE_SMP_SCHEDULE #endif hypre_BoxLoop1For(iac) { ac_ptr[iac] = 0.0; } hypre_BoxLoop1End(iac); } /* if ( hypre_BoxVolume(shift_ibox) ) */ hypre_BoxDestroy(shift_ibox); } /* for (j= 0; j< nboxman_entries; j++) */ } /* if (abs_shape) */ } /* for (i= 0; i< stencil_size; i++) */ } /* if (stencils != NULL) */ } /* for (var2= 0; var2< nvars; var2++) */ hypre_TFree(boxman_entries); } /* hypre_ForBoxI ci */
HYPRE_Int hypre_seqAMGSetup( hypre_ParAMGData *amg_data, HYPRE_Int p_level, HYPRE_Int coarse_threshold) { /* Par Data Structure variables */ hypre_ParCSRMatrix **Par_A_array = hypre_ParAMGDataAArray(amg_data); MPI_Comm comm = hypre_ParCSRMatrixComm(Par_A_array[0]); MPI_Comm new_comm, seq_comm; hypre_ParCSRMatrix *A_seq = NULL; hypre_CSRMatrix *A_seq_diag; hypre_CSRMatrix *A_seq_offd; hypre_ParVector *F_seq = NULL; hypre_ParVector *U_seq = NULL; hypre_ParCSRMatrix *A; HYPRE_Int **dof_func_array; HYPRE_Int num_procs, my_id; HYPRE_Int level; HYPRE_Int redundant; HYPRE_Int num_functions; HYPRE_Solver coarse_solver; /* misc */ dof_func_array = hypre_ParAMGDataDofFuncArray(amg_data); num_functions = hypre_ParAMGDataNumFunctions(amg_data); redundant = hypre_ParAMGDataRedundant(amg_data); /*MPI Stuff */ hypre_MPI_Comm_size(comm, &num_procs); /*initial */ level = p_level; /* convert A at this level to sequential */ A = Par_A_array[level]; { HYPRE_Real *A_seq_data = NULL; HYPRE_Int *A_seq_i = NULL; HYPRE_Int *A_seq_offd_i = NULL; HYPRE_Int *A_seq_j = NULL; HYPRE_Int *seq_dof_func = NULL; HYPRE_Real *A_tmp_data = NULL; HYPRE_Int *A_tmp_i = NULL; HYPRE_Int *A_tmp_j = NULL; HYPRE_Int *info = NULL; HYPRE_Int *displs = NULL; HYPRE_Int *displs2 = NULL; HYPRE_Int i, j, size, num_nonzeros, total_nnz, cnt; hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd(A); HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); HYPRE_Real *A_diag_data = hypre_CSRMatrixData(A_diag); HYPRE_Real *A_offd_data = hypre_CSRMatrixData(A_offd); HYPRE_Int num_rows = hypre_CSRMatrixNumRows(A_diag); HYPRE_Int first_row_index = hypre_ParCSRMatrixFirstRowIndex(A); HYPRE_Int new_num_procs, *row_starts; hypre_GenerateSubComm(comm, num_rows, &new_comm); /*hypre_MPI_Group orig_group, new_group; HYPRE_Int *ranks, new_num_procs, *row_starts; info = hypre_CTAlloc(HYPRE_Int, num_procs); hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, comm); ranks = hypre_CTAlloc(HYPRE_Int, num_procs); new_num_procs = 0; for (i=0; i < num_procs; i++) if (info[i]) { ranks[new_num_procs] = i; info[new_num_procs++] = info[i]; } hypre_MPI_Comm_group(comm, &orig_group); hypre_MPI_Group_incl(orig_group, new_num_procs, ranks, &new_group); hypre_MPI_Comm_create(comm, new_group, &new_comm); hypre_MPI_Group_free(&new_group); hypre_MPI_Group_free(&orig_group); */ if (num_rows) { hypre_ParAMGDataParticipate(amg_data) = 1; hypre_MPI_Comm_size(new_comm, &new_num_procs); hypre_MPI_Comm_rank(new_comm, &my_id); info = hypre_CTAlloc(HYPRE_Int, new_num_procs); if (redundant) hypre_MPI_Allgather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); else hypre_MPI_Gather(&num_rows, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, 0, new_comm); /* alloc space in seq data structure only for participating procs*/ if (redundant || my_id == 0) { HYPRE_BoomerAMGCreate(&coarse_solver); HYPRE_BoomerAMGSetMaxRowSum(coarse_solver, hypre_ParAMGDataMaxRowSum(amg_data)); HYPRE_BoomerAMGSetStrongThreshold(coarse_solver, hypre_ParAMGDataStrongThreshold(amg_data)); HYPRE_BoomerAMGSetCoarsenType(coarse_solver, hypre_ParAMGDataCoarsenType(amg_data)); HYPRE_BoomerAMGSetInterpType(coarse_solver, hypre_ParAMGDataInterpType(amg_data)); HYPRE_BoomerAMGSetTruncFactor(coarse_solver, hypre_ParAMGDataTruncFactor(amg_data)); HYPRE_BoomerAMGSetPMaxElmts(coarse_solver, hypre_ParAMGDataPMaxElmts(amg_data)); if (hypre_ParAMGDataUserRelaxType(amg_data) > -1) HYPRE_BoomerAMGSetRelaxType(coarse_solver, hypre_ParAMGDataUserRelaxType(amg_data)); HYPRE_BoomerAMGSetRelaxOrder(coarse_solver, hypre_ParAMGDataRelaxOrder(amg_data)); HYPRE_BoomerAMGSetRelaxWt(coarse_solver, hypre_ParAMGDataUserRelaxWeight(amg_data)); if (hypre_ParAMGDataUserNumSweeps(amg_data) > -1) HYPRE_BoomerAMGSetNumSweeps(coarse_solver, hypre_ParAMGDataUserNumSweeps(amg_data)); HYPRE_BoomerAMGSetNumFunctions(coarse_solver, num_functions); HYPRE_BoomerAMGSetMaxIter(coarse_solver, 1); HYPRE_BoomerAMGSetTol(coarse_solver, 0); } /* Create CSR Matrix, will be Diag part of new matrix */ A_tmp_i = hypre_CTAlloc(HYPRE_Int, num_rows+1); A_tmp_i[0] = 0; for (i=1; i < num_rows+1; i++) A_tmp_i[i] = A_diag_i[i]-A_diag_i[i-1]+A_offd_i[i]-A_offd_i[i-1]; num_nonzeros = A_offd_i[num_rows]+A_diag_i[num_rows]; A_tmp_j = hypre_CTAlloc(HYPRE_Int, num_nonzeros); A_tmp_data = hypre_CTAlloc(HYPRE_Real, num_nonzeros); cnt = 0; for (i=0; i < num_rows; i++) { for (j=A_diag_i[i]; j < A_diag_i[i+1]; j++) { A_tmp_j[cnt] = A_diag_j[j]+first_row_index; A_tmp_data[cnt++] = A_diag_data[j]; } for (j=A_offd_i[i]; j < A_offd_i[i+1]; j++) { A_tmp_j[cnt] = col_map_offd[A_offd_j[j]]; A_tmp_data[cnt++] = A_offd_data[j]; } } displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; if (redundant || my_id == 0) { A_seq_i = hypre_CTAlloc(HYPRE_Int, size+1); A_seq_offd_i = hypre_CTAlloc(HYPRE_Int, size+1); if (num_functions > 1) seq_dof_func = hypre_CTAlloc(HYPRE_Int, size); } if (redundant) { hypre_MPI_Allgatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, new_comm ); if (num_functions > 1) { hypre_MPI_Allgatherv ( dof_func_array[level], num_rows, HYPRE_MPI_INT, seq_dof_func, info, displs, HYPRE_MPI_INT, new_comm ); HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } else { if (A_seq_i) hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, &A_seq_i[1], info, displs, HYPRE_MPI_INT, 0, new_comm ); else hypre_MPI_Gatherv ( &A_tmp_i[1], num_rows, HYPRE_MPI_INT, A_seq_i, info, displs, HYPRE_MPI_INT, 0, new_comm ); if (num_functions > 1) { hypre_MPI_Gatherv ( dof_func_array[level], num_rows, HYPRE_MPI_INT, seq_dof_func, info, displs, HYPRE_MPI_INT, 0, new_comm ); if (my_id == 0) HYPRE_BoomerAMGSetDofFunc(coarse_solver, seq_dof_func); } } if (redundant || my_id == 0) { displs2 = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); A_seq_i[0] = 0; displs2[0] = 0; for (j=1; j < displs[1]; j++) A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; for (i=1; i < new_num_procs; i++) { for (j=displs[i]; j < displs[i+1]; j++) { A_seq_i[j] = A_seq_i[j]+A_seq_i[j-1]; } } A_seq_i[size] = A_seq_i[size]+A_seq_i[size-1]; displs2[new_num_procs] = A_seq_i[size]; for (i=1; i < new_num_procs+1; i++) { displs2[i] = A_seq_i[displs[i]]; info[i-1] = displs2[i] - displs2[i-1]; } total_nnz = displs2[new_num_procs]; A_seq_j = hypre_CTAlloc(HYPRE_Int, total_nnz); A_seq_data = hypre_CTAlloc(HYPRE_Real, total_nnz); } if (redundant) { hypre_MPI_Allgatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, new_comm ); hypre_MPI_Allgatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, HYPRE_MPI_REAL, new_comm ); } else { hypre_MPI_Gatherv ( A_tmp_j, num_nonzeros, HYPRE_MPI_INT, A_seq_j, info, displs2, HYPRE_MPI_INT, 0, new_comm ); hypre_MPI_Gatherv ( A_tmp_data, num_nonzeros, HYPRE_MPI_REAL, A_seq_data, info, displs2, HYPRE_MPI_REAL, 0, new_comm ); } hypre_TFree(info); hypre_TFree(displs); hypre_TFree(A_tmp_i); hypre_TFree(A_tmp_j); hypre_TFree(A_tmp_data); if (redundant || my_id == 0) { hypre_TFree(displs2); row_starts = hypre_CTAlloc(HYPRE_Int,2); row_starts[0] = 0; row_starts[1] = size; /* Create 1 proc communicator */ seq_comm = hypre_MPI_COMM_SELF; A_seq = hypre_ParCSRMatrixCreate(seq_comm,size,size, row_starts, row_starts, 0,total_nnz,0); A_seq_diag = hypre_ParCSRMatrixDiag(A_seq); A_seq_offd = hypre_ParCSRMatrixOffd(A_seq); hypre_CSRMatrixData(A_seq_diag) = A_seq_data; hypre_CSRMatrixI(A_seq_diag) = A_seq_i; hypre_CSRMatrixJ(A_seq_diag) = A_seq_j; hypre_CSRMatrixI(A_seq_offd) = A_seq_offd_i; F_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); U_seq = hypre_ParVectorCreate(seq_comm, size, row_starts); hypre_ParVectorOwnsPartitioning(F_seq) = 0; hypre_ParVectorOwnsPartitioning(U_seq) = 0; hypre_ParVectorInitialize(F_seq); hypre_ParVectorInitialize(U_seq); hypre_BoomerAMGSetup(coarse_solver,A_seq,F_seq,U_seq); hypre_ParAMGDataCoarseSolver(amg_data) = coarse_solver; hypre_ParAMGDataACoarse(amg_data) = A_seq; hypre_ParAMGDataFCoarse(amg_data) = F_seq; hypre_ParAMGDataUCoarse(amg_data) = U_seq; } hypre_ParAMGDataNewComm(amg_data) = new_comm; } } return 0; }
int hypre_SStructPMatrixDestroy( hypre_SStructPMatrix *pmatrix ) { hypre_SStructStencil **stencils; int nvars; int **smaps; hypre_StructStencil ***sstencils; hypre_StructMatrix ***smatrices; int **symmetric; int vi, vj; if (pmatrix) { hypre_SStructPMatrixRefCount(pmatrix) --; if (hypre_SStructPMatrixRefCount(pmatrix) == 0) { stencils = hypre_SStructPMatrixStencils(pmatrix); nvars = hypre_SStructPMatrixNVars(pmatrix); smaps = hypre_SStructPMatrixSMaps(pmatrix); sstencils = hypre_SStructPMatrixSStencils(pmatrix); smatrices = hypre_SStructPMatrixSMatrices(pmatrix); symmetric = hypre_SStructPMatrixSymmetric(pmatrix); for (vi = 0; vi < nvars; vi++) { HYPRE_SStructStencilDestroy(stencils[vi]); hypre_TFree(smaps[vi]); for (vj = 0; vj < nvars; vj++) { hypre_StructStencilDestroy(sstencils[vi][vj]); hypre_StructMatrixDestroy(smatrices[vi][vj]); } hypre_TFree(sstencils[vi]); hypre_TFree(smatrices[vi]); hypre_TFree(symmetric[vi]); } hypre_TFree(stencils); hypre_TFree(smaps); hypre_TFree(sstencils); hypre_TFree(smatrices); hypre_TFree(symmetric); hypre_TFree(hypre_SStructPMatrixSEntries(pmatrix)); hypre_TFree(pmatrix); } } return hypre_error_flag; }
int hypre_BoomerAMGSetupStats( void *amg_vdata, hypre_ParCSRMatrix *A ) { MPI_Comm comm = hypre_ParCSRMatrixComm(A); hypre_ParAMGData *amg_data = amg_vdata; /*hypre_SeqAMGData *seq_data = hypre_ParAMGDataSeqData(amg_data);*/ /* Data Structure variables */ hypre_ParCSRMatrix **A_array; hypre_ParCSRMatrix **P_array; hypre_CSRMatrix *A_diag; double *A_diag_data; int *A_diag_i; hypre_CSRMatrix *A_offd; double *A_offd_data; int *A_offd_i; hypre_CSRMatrix *P_diag; double *P_diag_data; int *P_diag_i; hypre_CSRMatrix *P_offd; double *P_offd_data; int *P_offd_i; int numrows; HYPRE_BigInt *row_starts; int num_levels; int coarsen_type; int interp_type; int measure_type; double global_nonzeros; double *send_buff; double *gather_buff; /* Local variables */ int level; int j; HYPRE_BigInt fine_size; int min_entries; int max_entries; int num_procs,my_id, num_threads; double min_rowsum; double max_rowsum; double sparse; int i; HYPRE_BigInt coarse_size; int entries; double avg_entries; double rowsum; double min_weight; double max_weight; int global_min_e; int global_max_e; double global_min_rsum; double global_max_rsum; double global_min_wt; double global_max_wt; double *num_coeffs; double *num_variables; double total_variables; double operat_cmplxty; double grid_cmplxty; /* amg solve params */ int max_iter; int cycle_type; int *num_grid_sweeps; int *grid_relax_type; int relax_order; int **grid_relax_points; double *relax_weight; double *omega; double tol; int one = 1; int minus_one = -1; int zero = 0; int smooth_type; int smooth_num_levels; int agg_num_levels; /*int seq_cg = 0;*/ /*if (seq_data) seq_cg = 1;*/ MPI_Comm_size(comm, &num_procs); MPI_Comm_rank(comm,&my_id); num_threads = hypre_NumThreads(); if (my_id == 0) printf("\nNumber of MPI processes: %d , Number of OpenMP threads: %d\n", num_procs, num_threads); A_array = hypre_ParAMGDataAArray(amg_data); P_array = hypre_ParAMGDataPArray(amg_data); num_levels = hypre_ParAMGDataNumLevels(amg_data); coarsen_type = hypre_ParAMGDataCoarsenType(amg_data); interp_type = hypre_ParAMGDataInterpType(amg_data); measure_type = hypre_ParAMGDataMeasureType(amg_data); smooth_type = hypre_ParAMGDataSmoothType(amg_data); smooth_num_levels = hypre_ParAMGDataSmoothNumLevels(amg_data); agg_num_levels = hypre_ParAMGDataAggNumLevels(amg_data); /*---------------------------------------------------------- * Get the amg_data data *----------------------------------------------------------*/ num_levels = hypre_ParAMGDataNumLevels(amg_data); max_iter = hypre_ParAMGDataMaxIter(amg_data); cycle_type = hypre_ParAMGDataCycleType(amg_data); num_grid_sweeps = hypre_ParAMGDataNumGridSweeps(amg_data); grid_relax_type = hypre_ParAMGDataGridRelaxType(amg_data); grid_relax_points = hypre_ParAMGDataGridRelaxPoints(amg_data); relax_weight = hypre_ParAMGDataRelaxWeight(amg_data); relax_order = hypre_ParAMGDataRelaxOrder(amg_data); omega = hypre_ParAMGDataOmega(amg_data); tol = hypre_ParAMGDataTol(amg_data); /*block_mode = hypre_ParAMGDataBlockMode(amg_data);*/ send_buff = hypre_CTAlloc(double, 6); #ifdef HYPRE_NO_GLOBAL_PARTITION gather_buff = hypre_CTAlloc(double,6); #else gather_buff = hypre_CTAlloc(double,6*num_procs); #endif if (my_id==0) { printf("\nBoomerAMG SETUP PARAMETERS:\n\n"); printf(" Max levels = %d\n",hypre_ParAMGDataMaxLevels(amg_data)); printf(" Num levels = %d\n\n",num_levels); printf(" Strength Threshold = %f\n", hypre_ParAMGDataStrongThreshold(amg_data)); printf(" Interpolation Truncation Factor = %f\n", hypre_ParAMGDataTruncFactor(amg_data)); printf(" Maximum Row Sum Threshold for Dependency Weakening = %f\n\n", hypre_ParAMGDataMaxRowSum(amg_data)); if (coarsen_type == 0) { printf(" Coarsening Type = Cleary-Luby-Jones-Plassman\n"); } else if (abs(coarsen_type) == 1) { printf(" Coarsening Type = Ruge\n"); } else if (abs(coarsen_type) == 2) { printf(" Coarsening Type = Ruge2B\n"); } else if (abs(coarsen_type) == 3) { printf(" Coarsening Type = Ruge3\n"); } else if (abs(coarsen_type) == 4) { printf(" Coarsening Type = Ruge 3c \n"); } else if (abs(coarsen_type) == 5) { printf(" Coarsening Type = Ruge relax special points \n"); } else if (abs(coarsen_type) == 6) { printf(" Coarsening Type = Falgout-CLJP \n"); } else if (abs(coarsen_type) == 8) { printf(" Coarsening Type = PMIS \n"); } else if (abs(coarsen_type) == 10) { printf(" Coarsening Type = HMIS \n"); } else if (abs(coarsen_type) == 11) { printf(" Coarsening Type = Ruge 1st pass only \n"); } else if (abs(coarsen_type) == 9) { printf(" Coarsening Type = PMIS fixed random \n"); } else if (abs(coarsen_type) == 7) { printf(" Coarsening Type = CLJP, fixed random \n"); } if (coarsen_type > 0) { printf(" Hybrid Coarsening (switch to CLJP when coarsening slows)\n"); } if (coarsen_type) printf(" measures are determined %s\n\n", (measure_type ? "globally" : "locally")); if (agg_num_levels) printf(" no. of levels of aggressive coarsening: %d\n\n", agg_num_levels); #ifdef HYPRE_NO_GLOBAL_PARTITION printf( "\n No global partition option chosen.\n\n"); #endif if (interp_type == 0) { printf(" Interpolation = modified classical interpolation\n"); } else if (interp_type == 1) { printf(" Interpolation = LS interpolation \n"); } else if (interp_type == 2) { printf(" Interpolation = modified classical interpolation for hyperbolic PDEs\n"); } else if (interp_type == 3) { printf(" Interpolation = direct interpolation with separation of weights\n"); } else if (interp_type == 4) { printf(" Interpolation = multipass interpolation\n"); } else if (interp_type == 5) { printf(" Interpolation = multipass interpolation with separation of weights\n"); } else if (interp_type == 6) { printf(" Interpolation = extended+i interpolation\n"); } else if (interp_type == 7) { printf(" Interpolation = extended+i interpolation (only when needed)\n"); } else if (interp_type == 8) { printf(" Interpolation = standard interpolation\n"); } else if (interp_type == 9) { printf(" Interpolation = standard interpolation with separation of weights\n"); } else if (interp_type == 12) { printf(" FF interpolation \n"); } else if (interp_type == 13) { printf(" FF1 interpolation \n"); } { printf( "\nOperator Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("==================================\n"); #else printf(" nonzero entries p"); printf("er row row sums\n"); printf("lev rows entries sparse min max "); printf("avg min max\n"); printf("======================================="); printf("============================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ num_coeffs = hypre_CTAlloc(double,num_levels); num_variables = hypre_CTAlloc(double,num_levels); for (level = 0; level < num_levels; level++) { { A_diag = hypre_ParCSRMatrixDiag(A_array[level]); A_diag_data = hypre_CSRMatrixData(A_diag); A_diag_i = hypre_CSRMatrixI(A_diag); A_offd = hypre_ParCSRMatrixOffd(A_array[level]); A_offd_data = hypre_CSRMatrixData(A_offd); A_offd_i = hypre_CSRMatrixI(A_offd); row_starts = hypre_ParCSRMatrixRowStarts(A_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(A_array[level]); global_nonzeros = hypre_ParCSRMatrixDNumNonzeros(A_array[level]); num_coeffs[level] = global_nonzeros; num_variables[level] = (double) fine_size; sparse = global_nonzeros /((double) fine_size * (double) fine_size); min_entries = 0; max_entries = 0; min_rowsum = 0.0; max_rowsum = 0.0; if (hypre_CSRMatrixNumRows(A_diag)) { min_entries = (A_diag_i[1]-A_diag_i[0])+(A_offd_i[1]-A_offd_i[0]); for (j = A_diag_i[0]; j < A_diag_i[1]; j++) min_rowsum += A_diag_data[j]; for (j = A_offd_i[0]; j < A_offd_i[1]; j++) min_rowsum += A_offd_data[j]; max_rowsum = min_rowsum; for (j = 0; j < hypre_CSRMatrixNumRows(A_diag); j++) { entries = (A_diag_i[j+1]-A_diag_i[j])+(A_offd_i[j+1]-A_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = A_diag_i[j]; i < A_diag_i[j+1]; i++) rowsum += A_diag_data[i]; for (i = A_offd_i[j]; i < A_offd_i[j+1]; i++) rowsum += A_offd_data[i]; min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = global_nonzeros / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; MPI_Reduce(send_buff, gather_buff, 4, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id ==0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = - gather_buff[2]; global_max_rsum = gather_buff[3]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; MPI_Gather(send_buff,4,MPI_DOUBLE,gather_buff,4,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1]-row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*4]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*4 +2]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*4 +1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*4 +3]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #else printf( "%2d %7d %8.0f %0.3f %4d %4d", level, fine_size, global_nonzeros, sparse, global_min_e, global_max_e); #endif printf(" %4.1f %10.3e %10.3e\n", avg_entries, global_min_rsum, global_max_rsum); } #endif } if (my_id == 0) { { printf( "\n\nInterpolation Matrix Information:\n\n"); } #if HYPRE_LONG_LONG printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows x cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("======================================\n"); #else printf(" entries/row min max"); printf(" row sums\n"); printf("lev rows cols min max "); printf(" weight weight min max \n"); printf("======================================="); printf("==========================\n"); #endif } /*----------------------------------------------------- * Enter Statistics Loop *-----------------------------------------------------*/ for (level = 0; level < num_levels-1; level++) { { P_diag = hypre_ParCSRMatrixDiag(P_array[level]); P_diag_data = hypre_CSRMatrixData(P_diag); P_diag_i = hypre_CSRMatrixI(P_diag); P_offd = hypre_ParCSRMatrixOffd(P_array[level]); P_offd_data = hypre_CSRMatrixData(P_offd); P_offd_i = hypre_CSRMatrixI(P_offd); row_starts = hypre_ParCSRMatrixRowStarts(P_array[level]); fine_size = hypre_ParCSRMatrixGlobalNumRows(P_array[level]); coarse_size = hypre_ParCSRMatrixGlobalNumCols(P_array[level]); global_nonzeros = hypre_ParCSRMatrixNumNonzeros(P_array[level]); min_weight = 1.0; max_weight = 0.0; max_rowsum = 0.0; min_rowsum = 0.0; min_entries = 0; max_entries = 0; if (hypre_CSRMatrixNumRows(P_diag)) { if (hypre_CSRMatrixNumCols(P_diag)) min_weight = P_diag_data[0]; for (j = P_diag_i[0]; j < P_diag_i[1]; j++) { min_weight = hypre_min(min_weight, P_diag_data[j]); if (P_diag_data[j] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[j]); min_rowsum += P_diag_data[j]; } for (j = P_offd_i[0]; j < P_offd_i[1]; j++) { min_weight = hypre_min(min_weight, P_offd_data[j]); if (P_offd_data[j] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[j]); min_rowsum += P_offd_data[j]; } max_rowsum = min_rowsum; min_entries = (P_diag_i[1]-P_diag_i[0])+(P_offd_i[1]-P_offd_i[0]); max_entries = 0; for (j = 0; j < hypre_CSRMatrixNumRows(P_diag); j++) { entries = (P_diag_i[j+1]-P_diag_i[j])+(P_offd_i[j+1]-P_offd_i[j]); min_entries = hypre_min(entries, min_entries); max_entries = hypre_max(entries, max_entries); rowsum = 0.0; for (i = P_diag_i[j]; i < P_diag_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_diag_data[i]); if (P_diag_data[i] != 1.0) max_weight = hypre_max(max_weight, P_diag_data[i]); rowsum += P_diag_data[i]; } for (i = P_offd_i[j]; i < P_offd_i[j+1]; i++) { min_weight = hypre_min(min_weight, P_offd_data[i]); if (P_offd_data[i] != 1.0) max_weight = hypre_max(max_weight, P_offd_data[i]); rowsum += P_offd_data[i]; } min_rowsum = hypre_min(rowsum, min_rowsum); max_rowsum = hypre_max(rowsum, max_rowsum); } } avg_entries = ((double) global_nonzeros) / ((double) fine_size); } #ifdef HYPRE_NO_GLOBAL_PARTITION numrows = (int)(row_starts[1]-row_starts[0]); if (!numrows) /* if we don't have any rows, then don't have this count toward min row sum or min num entries */ { min_entries = 1000000; min_rowsum = 1.0e7; min_weight = 1.0e7; } send_buff[0] = - (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = - min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = - min_weight; send_buff[5] = max_weight; MPI_Reduce(send_buff, gather_buff, 6, MPI_DOUBLE, MPI_MAX, 0, comm); if (my_id == 0) { global_min_e = - gather_buff[0]; global_max_e = gather_buff[1]; global_min_rsum = -gather_buff[2]; global_max_rsum = gather_buff[3]; global_min_wt = -gather_buff[4]; global_max_wt = gather_buff[5]; #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #else send_buff[0] = (double) min_entries; send_buff[1] = (double) max_entries; send_buff[2] = min_rowsum; send_buff[3] = max_rowsum; send_buff[4] = min_weight; send_buff[5] = max_weight; MPI_Gather(send_buff,6,MPI_DOUBLE,gather_buff,6,MPI_DOUBLE,0,comm); if (my_id == 0) { global_min_e = 1000000; global_max_e = 0; global_min_rsum = 1.0e7; global_max_rsum = 0.0; global_min_wt = 1.0e7; global_max_wt = 0.0; for (j = 0; j < num_procs; j++) { numrows = row_starts[j+1] - row_starts[j]; if (numrows) { global_min_e = hypre_min(global_min_e, (int) gather_buff[j*6]); global_min_rsum = hypre_min(global_min_rsum, gather_buff[j*6+2]); global_min_wt = hypre_min(global_min_wt, gather_buff[j*6+4]); } global_max_e = hypre_max(global_max_e, (int) gather_buff[j*6+1]); global_max_rsum = hypre_max(global_max_rsum, gather_buff[j*6+3]); global_max_wt = hypre_max(global_max_wt, gather_buff[j*6+5]); } #ifdef HYPRE_LONG_LONG printf( "%2d %12lld x %-12lld %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #else printf( "%2d %5d x %-5d %3d %3d", level, fine_size, coarse_size, global_min_e, global_max_e); #endif printf(" %10.3e %9.3e %9.3e %9.3e\n", global_min_wt, global_max_wt, global_min_rsum, global_max_rsum); } #endif } total_variables = 0; operat_cmplxty = 0; for (j=0;j<hypre_ParAMGDataNumLevels(amg_data);j++) { operat_cmplxty += num_coeffs[j] / num_coeffs[0]; total_variables += num_variables[j]; } if (num_variables[0] != 0) grid_cmplxty = total_variables / num_variables[0]; if (my_id == 0 ) { printf("\n\n Complexity: grid = %f\n",grid_cmplxty); printf(" operator = %f\n",operat_cmplxty); } if (my_id == 0) printf("\n\n"); if (my_id == 0) { printf("\n\nBoomerAMG SOLVER PARAMETERS:\n\n"); printf( " Maximum number of cycles: %d \n",max_iter); printf( " Stopping Tolerance: %e \n",tol); printf( " Cycle type (1 = V, 2 = W, etc.): %d\n\n", cycle_type); printf( " Relaxation Parameters:\n"); printf( " Visiting Grid: down up coarse\n"); printf( " Number of partial sweeps: %4d %2d %4d \n", num_grid_sweeps[1], num_grid_sweeps[2],num_grid_sweeps[3]); printf( " Type 0=Jac, 3=hGS, 6=hSGS, 9=GE: %4d %2d %4d \n", grid_relax_type[1], grid_relax_type[2],grid_relax_type[3]); #if 1 /* TO DO: may not want this to print if CG in the coarse grid */ printf( " Point types, partial sweeps (1=C, -1=F):\n"); if (grid_relax_points) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", grid_relax_points[1][j]); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", grid_relax_points[2][j]); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", grid_relax_points[3][j]); printf( "\n\n"); } else if (relax_order == 1) { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d %2d", one, minus_one); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d %2d", minus_one, one); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } else { printf( " Pre-CG relaxation (down):"); for (j = 0; j < num_grid_sweeps[1]; j++) printf(" %2d", zero); printf( "\n"); printf( " Post-CG relaxation (up):"); for (j = 0; j < num_grid_sweeps[2]; j++) printf(" %2d", zero); printf( "\n"); printf( " Coarsest grid:"); for (j = 0; j < num_grid_sweeps[3]; j++) printf(" %2d", zero); printf( "\n\n"); } #endif if (smooth_type == 6) for (j=0; j < smooth_num_levels; j++) printf( " Schwarz Relaxation Weight %f level %d\n", hypre_ParAMGDataSchwarzRlxWeight(amg_data),j); for (j=0; j < num_levels; j++) if (relax_weight[j] != 1) printf( " Relaxation Weight %f level %d\n",relax_weight[j],j); for (j=0; j < num_levels; j++) if (omega[j] != 1) printf( " Outer relaxation weight %f level %d\n",omega[j],j); } /*if (seq_cg) { hypre_seqAMGSetupStats(amg_data,num_coeffs[0],num_variables[0], operat_cmplxty, grid_cmplxty ); }*/ hypre_TFree(num_coeffs); hypre_TFree(num_variables); hypre_TFree(send_buff); hypre_TFree(gather_buff); return(0); }
int hypre_SStructPMatrixCreate( MPI_Comm comm, hypre_SStructPGrid *pgrid, hypre_SStructStencil **stencils, hypre_SStructPMatrix **pmatrix_ptr ) { hypre_SStructPMatrix *pmatrix; int nvars; int **smaps; hypre_StructStencil ***sstencils; hypre_StructMatrix ***smatrices; int **symmetric; hypre_StructStencil *sstencil; int *vars; hypre_Index *sstencil_shape; int sstencil_size; int new_dim; int *new_sizes; hypre_Index **new_shapes; int size; hypre_StructGrid *sgrid; int vi, vj; int i, j, k; pmatrix = hypre_TAlloc(hypre_SStructPMatrix, 1); hypre_SStructPMatrixComm(pmatrix) = comm; hypre_SStructPMatrixPGrid(pmatrix) = pgrid; hypre_SStructPMatrixStencils(pmatrix) = stencils; nvars = hypre_SStructPGridNVars(pgrid); hypre_SStructPMatrixNVars(pmatrix) = nvars; /* create sstencils */ smaps = hypre_TAlloc(int *, nvars); sstencils = hypre_TAlloc(hypre_StructStencil **, nvars); new_sizes = hypre_TAlloc(int, nvars); new_shapes = hypre_TAlloc(hypre_Index *, nvars); size = 0; for (vi = 0; vi < nvars; vi++) { sstencils[vi] = hypre_TAlloc(hypre_StructStencil *, nvars); for (vj = 0; vj < nvars; vj++) { sstencils[vi][vj] = NULL; new_sizes[vj] = 0; } sstencil = hypre_SStructStencilSStencil(stencils[vi]); vars = hypre_SStructStencilVars(stencils[vi]); sstencil_shape = hypre_StructStencilShape(sstencil); sstencil_size = hypre_StructStencilSize(sstencil); smaps[vi] = hypre_TAlloc(int, sstencil_size); for (i = 0; i < sstencil_size; i++) { j = vars[i]; new_sizes[j]++; } for (vj = 0; vj < nvars; vj++) { if (new_sizes[vj]) { new_shapes[vj] = hypre_TAlloc(hypre_Index, new_sizes[vj]); new_sizes[vj] = 0; } } for (i = 0; i < sstencil_size; i++) { j = vars[i]; k = new_sizes[j]; hypre_CopyIndex(sstencil_shape[i], new_shapes[j][k]); smaps[vi][i] = k; new_sizes[j]++; } new_dim = hypre_StructStencilDim(sstencil); for (vj = 0; vj < nvars; vj++) { if (new_sizes[vj]) { sstencils[vi][vj] = hypre_StructStencilCreate(new_dim, new_sizes[vj], new_shapes[vj]); } size = hypre_max(size, new_sizes[vj]); } } hypre_SStructPMatrixSMaps(pmatrix) = smaps; hypre_SStructPMatrixSStencils(pmatrix) = sstencils; hypre_TFree(new_sizes); hypre_TFree(new_shapes); /* create smatrices */ smatrices = hypre_TAlloc(hypre_StructMatrix **, nvars); for (vi = 0; vi < nvars; vi++) { smatrices[vi] = hypre_TAlloc(hypre_StructMatrix *, nvars); for (vj = 0; vj < nvars; vj++) { smatrices[vi][vj] = NULL; if (sstencils[vi][vj] != NULL) { sgrid = hypre_SStructPGridSGrid(pgrid, vi); smatrices[vi][vj] = hypre_StructMatrixCreate(comm, sgrid, sstencils[vi][vj]); } } } hypre_SStructPMatrixSMatrices(pmatrix) = smatrices; /* create symmetric */ symmetric = hypre_TAlloc(int *, nvars); for (vi = 0; vi < nvars; vi++) { symmetric[vi] = hypre_TAlloc(int, nvars); for (vj = 0; vj < nvars; vj++) { symmetric[vi][vj] = 0; } } hypre_SStructPMatrixSymmetric(pmatrix) = symmetric; hypre_SStructPMatrixSEntriesSize(pmatrix) = size; hypre_SStructPMatrixSEntries(pmatrix) = hypre_TAlloc(int, size); hypre_SStructPMatrixRefCount(pmatrix) = 1; *pmatrix_ptr = pmatrix; return hypre_error_flag; }
HYPRE_Int hypre_BlockTridiagSetup(void *data, hypre_ParCSRMatrix *A, hypre_ParVector *b, hypre_ParVector *x) { HYPRE_Int i, j, *index_set1, print_level, nsweeps, relax_type; HYPRE_Int nrows, nrows1, nrows2, start1, start2, *index_set2; HYPRE_Int count, ierr; double threshold; hypre_ParCSRMatrix **submatrices; HYPRE_Solver precon1; HYPRE_Solver precon2; HYPRE_IJVector ij_u1, ij_u2, ij_f1, ij_f2; hypre_ParVector *vector; MPI_Comm comm; hypre_BlockTridiagData *b_data = (hypre_BlockTridiagData *) data; HYPRE_ParCSRMatrixGetComm((HYPRE_ParCSRMatrix) A, &comm); index_set1 = b_data->index_set1; nrows1 = index_set1[0]; nrows = hypre_ParCSRMatrixNumRows(A); nrows2 = nrows - nrows1; b_data->index_set2 = hypre_CTAlloc(HYPRE_Int, nrows2+1); index_set2 = b_data->index_set2; index_set2[0] = nrows2; count = 1; for (i = 0; i < index_set1[1]; i++) index_set2[count++] = i; for (i = 1; i < nrows1; i++) for (j = index_set1[i]+1; j < index_set1[i+1]; j++) index_set2[count++] = j; for (i = index_set1[nrows1]+1; i < nrows; i++) index_set2[count++] = i; submatrices = hypre_CTAlloc(hypre_ParCSRMatrix *, 4); hypre_ParCSRMatrixExtractSubmatrices(A, index_set1, &submatrices); nrows1 = hypre_ParCSRMatrixNumRows(submatrices[0]); nrows2 = hypre_ParCSRMatrixNumRows(submatrices[3]); start1 = hypre_ParCSRMatrixFirstRowIndex(submatrices[0]); start2 = hypre_ParCSRMatrixFirstRowIndex(submatrices[3]); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_u1); HYPRE_IJVectorSetObjectType(ij_u1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u1); ierr += HYPRE_IJVectorAssemble(ij_u1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start1, start1+nrows1-1, &ij_f1); HYPRE_IJVectorSetObjectType(ij_f1, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f1); ierr += HYPRE_IJVectorAssemble(ij_f1); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows2-1, &ij_u2); HYPRE_IJVectorSetObjectType(ij_u2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_u2); ierr += HYPRE_IJVectorAssemble(ij_u2); hypre_assert(!ierr); HYPRE_IJVectorCreate(comm, start2, start2+nrows1-1, &ij_f2); HYPRE_IJVectorSetObjectType(ij_f2, HYPRE_PARCSR); ierr = HYPRE_IJVectorInitialize(ij_f2); ierr += HYPRE_IJVectorAssemble(ij_f2); hypre_assert(!ierr); HYPRE_IJVectorGetObject(ij_f1, (void **) &vector); b_data->F1 = vector; HYPRE_IJVectorGetObject(ij_u1, (void **) &vector); b_data->U1 = vector; HYPRE_IJVectorGetObject(ij_f2, (void **) &vector); b_data->F2 = vector; HYPRE_IJVectorGetObject(ij_u2, (void **) &vector); b_data->U2 = vector; print_level = b_data->print_level; threshold = b_data->threshold; nsweeps = b_data->num_sweeps; relax_type = b_data->relax_type; threshold = b_data->threshold; HYPRE_BoomerAMGCreate(&precon1); HYPRE_BoomerAMGSetMaxIter(precon1, 1); HYPRE_BoomerAMGSetCycleType(precon1, 1); HYPRE_BoomerAMGSetPrintLevel(precon1, print_level); HYPRE_BoomerAMGSetMaxLevels(precon1, 25); HYPRE_BoomerAMGSetMeasureType(precon1, 0); HYPRE_BoomerAMGSetCoarsenType(precon1, 0); HYPRE_BoomerAMGSetStrongThreshold(precon1, threshold); HYPRE_BoomerAMGSetNumFunctions(precon1, 1); HYPRE_BoomerAMGSetNumSweeps(precon1, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon1, relax_type); hypre_BoomerAMGSetup(precon1, submatrices[0], b_data->U1, b_data->F1); HYPRE_BoomerAMGCreate(&precon2); HYPRE_BoomerAMGSetMaxIter(precon2, 1); HYPRE_BoomerAMGSetCycleType(precon2, 1); HYPRE_BoomerAMGSetPrintLevel(precon2, print_level); HYPRE_BoomerAMGSetMaxLevels(precon2, 25); HYPRE_BoomerAMGSetMeasureType(precon2, 0); HYPRE_BoomerAMGSetCoarsenType(precon2, 0); HYPRE_BoomerAMGSetMeasureType(precon2, 1); HYPRE_BoomerAMGSetStrongThreshold(precon2, threshold); HYPRE_BoomerAMGSetNumFunctions(precon2, 1); HYPRE_BoomerAMGSetNumSweeps(precon2, nsweeps); HYPRE_BoomerAMGSetRelaxType(precon2, relax_type); hypre_BoomerAMGSetup(precon2, submatrices[3], NULL, NULL); b_data->precon1 = precon1; b_data->precon2 = precon2; b_data->A11 = submatrices[0]; hypre_ParCSRMatrixDestroy(submatrices[1]); b_data->A21 = submatrices[2]; b_data->A22 = submatrices[3]; hypre_TFree(submatrices); return (0); }
int hypre_SStructUMatrixInitialize( hypre_SStructMatrix *matrix ) { HYPRE_IJMatrix ijmatrix = hypre_SStructMatrixIJMatrix(matrix); hypre_SStructGraph *graph = hypre_SStructMatrixGraph(matrix); hypre_SStructGrid *grid = hypre_SStructGraphGrid(graph); int nparts = hypre_SStructGraphNParts(graph); hypre_SStructPGrid **pgrids = hypre_SStructGraphPGrids(graph); hypre_SStructStencil ***stencils = hypre_SStructGraphStencils(graph); int nUventries = hypre_SStructGraphNUVEntries(graph); int *iUventries = hypre_SStructGraphIUVEntries(graph); hypre_SStructUVEntry **Uventries = hypre_SStructGraphUVEntries(graph); int **nvneighbors = hypre_SStructGridNVNeighbors(grid); hypre_StructGrid *sgrid; hypre_SStructStencil *stencil; int *split; int nvars; int nrows, nnzs ; int part, var, entry, i, j, k,m,b; int *row_sizes; int max_row_size; int matrix_type = hypre_SStructMatrixObjectType(matrix); hypre_Box *gridbox; hypre_Box *loopbox; hypre_Box *ghostbox; hypre_BoxArray *boxes; int *num_ghost; HYPRE_IJMatrixSetObjectType(ijmatrix, HYPRE_PARCSR); /* GEC1002 the ghlocalsize is used to set the number of rows */ if (matrix_type == HYPRE_PARCSR) { nrows = hypre_SStructGridLocalSize(grid); } if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT) { nrows = hypre_SStructGridGhlocalSize(grid) ; } /* set row sizes */ m = 0; row_sizes = hypre_CTAlloc(int, nrows); max_row_size = 0; for (part = 0; part < nparts; part++) { nvars = hypre_SStructPGridNVars(pgrids[part]); for (var = 0; var < nvars; var++) { sgrid = hypre_SStructPGridSGrid(pgrids[part], var); stencil = stencils[part][var]; split = hypre_SStructMatrixSplit(matrix, part, var); nnzs = 0; for (entry = 0; entry < hypre_SStructStencilSize(stencil); entry++) { if (split[entry] == -1) { nnzs++; } } #if 0 /* TODO: For now, assume stencil is full/complete */ if (hypre_SStructMatrixSymmetric(matrix)) { nnzs = 2*nnzs - 1; } #endif /**************/ boxes = hypre_StructGridBoxes(sgrid) ; num_ghost = hypre_StructGridNumGhost(sgrid); for (b = 0; b < hypre_BoxArraySize(boxes); b++) { gridbox = hypre_BoxArrayBox(boxes, b); ghostbox = hypre_BoxCreate(); loopbox = hypre_BoxCreate(); hypre_CopyBox(gridbox,ghostbox); hypre_BoxExpand(ghostbox,num_ghost); if (matrix_type == HYPRE_SSTRUCT || matrix_type == HYPRE_STRUCT) { hypre_CopyBox(ghostbox,loopbox); } if (matrix_type == HYPRE_PARCSR) { hypre_CopyBox(gridbox,loopbox); } for (k = hypre_BoxIMinZ(loopbox); k <= hypre_BoxIMaxZ(loopbox); k++) { for (j = hypre_BoxIMinY(loopbox); j <= hypre_BoxIMaxY(loopbox); j++) { for (i = hypre_BoxIMinX(loopbox); i <= hypre_BoxIMaxX(loopbox); i++) { if ( ( ( i>=hypre_BoxIMinX(gridbox) ) && ( j>=hypre_BoxIMinY(gridbox) ) ) && ( k>=hypre_BoxIMinZ(gridbox) ) ) { if ( ( ( i<=hypre_BoxIMaxX(gridbox) ) && ( j<=hypre_BoxIMaxY(gridbox) ) ) && ( k<=hypre_BoxIMaxZ(gridbox) ) ) { row_sizes[m] = nnzs; max_row_size = hypre_max(max_row_size, row_sizes[m]); } } m++; } } } hypre_BoxDestroy(ghostbox); hypre_BoxDestroy(loopbox); } if (nvneighbors[part][var]) { max_row_size = hypre_max(max_row_size, hypre_SStructStencilSize(stencil)); } /*********************/ } } /* GEC0902 essentially for each UVentry we figure out how many extra columns * we need to add to the rowsizes */ for (entry = 0; entry < nUventries; entry++) { i = iUventries[entry]; row_sizes[i] += hypre_SStructUVEntryNUEntries(Uventries[i]); max_row_size = hypre_max(max_row_size, row_sizes[i]); } /* ZTODO: Update row_sizes based on neighbor off-part couplings */ HYPRE_IJMatrixSetRowSizes (ijmatrix, (const int *) row_sizes); hypre_TFree(row_sizes); hypre_SStructMatrixTmpColCoords(matrix) = hypre_CTAlloc(HYPRE_BigInt, max_row_size); hypre_SStructMatrixTmpCoeffs(matrix) = hypre_CTAlloc(double, max_row_size); /* GEC1002 at this point the processor has the partitioning (creation of ij) */ HYPRE_IJMatrixInitialize(ijmatrix); return hypre_error_flag; }
HYPRE_Int hypre_seqAMGCycle( hypre_ParAMGData *amg_data, HYPRE_Int p_level, hypre_ParVector **Par_F_array, hypre_ParVector **Par_U_array ) { hypre_ParVector *Aux_U; hypre_ParVector *Aux_F; /* Local variables */ HYPRE_Int Solve_err_flag = 0; HYPRE_Int n; HYPRE_Int i; hypre_Vector *u_local; double *u_data; HYPRE_Int first_index; /* Acquire seq data */ MPI_Comm new_comm = hypre_ParAMGDataNewComm(amg_data); HYPRE_Solver coarse_solver = hypre_ParAMGDataCoarseSolver(amg_data); hypre_ParCSRMatrix *A_coarse = hypre_ParAMGDataACoarse(amg_data); hypre_ParVector *F_coarse = hypre_ParAMGDataFCoarse(amg_data); hypre_ParVector *U_coarse = hypre_ParAMGDataUCoarse(amg_data); Aux_U = Par_U_array[p_level]; Aux_F = Par_F_array[p_level]; first_index = hypre_ParVectorFirstIndex(Aux_U); u_local = hypre_ParVectorLocalVector(Aux_U); u_data = hypre_VectorData(u_local); n = hypre_VectorSize(u_local); if (A_coarse) { double *f_data; hypre_Vector *f_local; hypre_Vector *tmp_vec; HYPRE_Int nf; HYPRE_Int local_info; double *recv_buf; HYPRE_Int *displs, *info; HYPRE_Int size; HYPRE_Int new_num_procs; hypre_MPI_Comm_size(new_comm, &new_num_procs); f_local = hypre_ParVectorLocalVector(Aux_F); f_data = hypre_VectorData(f_local); nf = hypre_VectorSize(f_local); /* first f */ info = hypre_CTAlloc(HYPRE_Int, new_num_procs); local_info = nf; hypre_MPI_Allgather(&local_info, 1, HYPRE_MPI_INT, info, 1, HYPRE_MPI_INT, new_comm); displs = hypre_CTAlloc(HYPRE_Int, new_num_procs+1); displs[0] = 0; for (i=1; i < new_num_procs+1; i++) displs[i] = displs[i-1]+info[i-1]; size = displs[new_num_procs]; tmp_vec = hypre_ParVectorLocalVector(F_coarse); recv_buf = hypre_VectorData(tmp_vec); hypre_MPI_Allgatherv ( f_data, nf, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); tmp_vec = hypre_ParVectorLocalVector(U_coarse); recv_buf = hypre_VectorData(tmp_vec); /*then u */ hypre_MPI_Allgatherv ( u_data, n, hypre_MPI_DOUBLE, recv_buf, info, displs, hypre_MPI_DOUBLE, new_comm ); /* clean up */ hypre_TFree(displs); hypre_TFree(info); hypre_BoomerAMGSolve(coarse_solver, A_coarse, F_coarse, U_coarse); /*copy my part of U to parallel vector */ { double *local_data; local_data = hypre_VectorData(hypre_ParVectorLocalVector(U_coarse)); for (i = 0; i < n; i++) { u_data[i] = local_data[first_index+i]; } } } return(Solve_err_flag); }
HYPRE_Int hypre_BoomerAMGCoarsenCGC (hypre_ParCSRMatrix *S,HYPRE_Int numberofgrids,HYPRE_Int coarsen_type,HYPRE_Int *CF_marker) /* CGC algorithm * ==================================================================================================== * coupling : the strong couplings * numberofgrids : the number of grids * coarsen_type : the coarsening type * gridpartition : the grid partition * =====================================================================================================*/ { HYPRE_Int j,/*p,*/mpisize,mpirank,/*rstart,rend,*/choice,*coarse,ierr=0; HYPRE_Int *vertexrange = NULL; HYPRE_Int *vertexrange_all = NULL; HYPRE_Int *CF_marker_offd = NULL; HYPRE_Int num_variables = hypre_CSRMatrixNumRows (hypre_ParCSRMatrixDiag(S)); /* HYPRE_Int num_cols_offd = hypre_CSRMatrixNumCols (hypre_ParCSRMatrixOffd (S)); */ /* HYPRE_Int *col_map_offd = hypre_ParCSRMatrixColMapOffd (S); */ /* HYPRE_Real wall_time; */ HYPRE_IJMatrix ijG; hypre_ParCSRMatrix *G; hypre_CSRMatrix *Gseq; MPI_Comm comm = hypre_ParCSRMatrixComm(S); hypre_MPI_Comm_size (comm,&mpisize); hypre_MPI_Comm_rank (comm,&mpirank); #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC preparation\n"); } #endif AmgCGCPrepare (S,numberofgrids,CF_marker,&CF_marker_offd,coarsen_type,&vertexrange); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC preparation, wall_time = %f s\n",wall_time); wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC matrix assembly\n"); } #endif AmgCGCGraphAssemble (S,vertexrange,CF_marker,CF_marker_offd,coarsen_type,&ijG); #if 0 HYPRE_IJMatrixPrint (ijG,"graph.txt"); #endif HYPRE_IJMatrixGetObject (ijG,(void**)&G); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC matrix assembly, wall_time = %f s\n",wall_time); wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC matrix communication\n"); } #endif #ifdef HYPRE_NO_GLOBAL_PARTITION { /* classical CGC does not really make sense in combination with HYPRE_NO_GLOBAL_PARTITION, but anyway, here it is: */ HYPRE_Int nlocal = vertexrange[1]-vertexrange[0]; vertexrange_all = hypre_CTAlloc (HYPRE_Int,mpisize+1); hypre_MPI_Allgather (&nlocal,1,HYPRE_MPI_INT,vertexrange_all+1,1,HYPRE_MPI_INT,comm); vertexrange_all[0]=0; for (j=2;j<=mpisize;j++) vertexrange_all[j]+=vertexrange_all[j-1]; } #else vertexrange_all = vertexrange; #endif Gseq = hypre_ParCSRMatrixToCSRMatrixAll (G); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC matrix communication, wall_time = %f s\n",wall_time); } #endif if (Gseq) { /* BM Aug 31, 2006: Gseq==NULL if G has no local rows */ #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC election\n"); } #endif AmgCGCChoose (Gseq,vertexrange_all,mpisize,&coarse); #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC election, wall_time = %f s\n",wall_time); } #endif #if 0 /* debugging */ if (!mpirank) { for (j=0;j<mpisize;j++) hypre_printf ("Processor %d, choice = %d of range %d - %d\n",j,coarse[j],vertexrange_all[j]+1,vertexrange_all[j+1]); } fflush(stdout); #endif #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC CF assignment\n"); } #endif choice = coarse[mpirank]; for (j=0;j<num_variables;j++) { if (CF_marker[j]==choice) CF_marker[j] = C_PT; else CF_marker[j] = F_PT; } hypre_CSRMatrixDestroy (Gseq); hypre_TFree (coarse); } else for (j=0;j<num_variables;j++) CF_marker[j] = F_PT; #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC CF assignment, wall_time = %f s\n",wall_time); } #endif #if 0 /* debugging */ if (!mpirank) { wall_time = time_getWallclockSeconds(); hypre_printf ("Starting CGC cleanup\n"); } #endif HYPRE_IJMatrixDestroy (ijG); hypre_TFree (vertexrange); #ifdef HYPRE_NO_GLOBAL_PARTITION hypre_TFree (vertexrange_all); #endif hypre_TFree (CF_marker_offd); #if 0 if (!mpirank) { wall_time = time_getWallclockSeconds() - wall_time; hypre_printf ("Finished CGC cleanup, wall_time = %f s\n",wall_time); } #endif return(ierr); }