void coarsen ( /* Coarsen until nvtxs <= vmax, compute and uncoarsen. */ struct vtx_data **graph, /* array of vtx data for graph */ int nvtxs, /* number of vertices in graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertices weights being used? */ int using_ewgts, /* are edge weights being used? */ float *term_wgts[], /* terminal weights */ int igeom, /* dimension for geometric information */ float **coords, /* coordinates for vertices */ double **yvecs, /* eigenvectors returned */ int ndims, /* number of eigenvectors to calculate */ int solver_flag, /* which eigensolver to use */ int vmax, /* largest subgraph to stop coarsening */ double eigtol, /* tolerence in eigen calculation */ int nstep, /* number of coarsenings between RQI steps */ int step, /* current step number */ int give_up /* has coarsening bogged down? */ ) { extern FILE *Output_File; /* output file or null */ extern int DEBUG_COARSEN; /* debug flag for coarsening */ extern int PERTURB; /* was matrix perturbed in Lanczos? */ extern double COARSEN_RATIO_MIN; /* min vtx reduction for coarsening */ extern int COARSEN_VWGTS; /* use vertex weights while coarsening? */ extern int COARSEN_EWGTS; /* use edge weights while coarsening? */ extern double refine_time; /* time for RQI/Symmlq iterative refinement */ struct vtx_data **cgraph; /* array of vtx data for coarsened graph */ struct orthlink *orthlist; /* list of lower evecs to suppress */ struct orthlink *newlink; /* lower evec to suppress */ double *cyvecs[MAXDIMS + 1]; /* eigenvectors for subgraph */ double evals[MAXDIMS + 1]; /* eigenvalues returned */ double goal[MAXSETS]; /* needed for convergence mode = 1 */ double *r1, *r2, *work; /* space needed by symmlq/RQI */ double *v, *w, *x, *y; /* space needed by symmlq/RQI */ double *gvec; /* rhs vector in extended eigenproblem */ double evalest; /* eigenvalue estimate returned by RQI */ double maxdeg; /* maximum weighted degree of a vertex */ float **ccoords; /* coordinates for coarsened graph */ float *cterm_wgts[MAXSETS]; /* coarse graph terminal weights */ float *new_term_wgts[MAXSETS]; /* terminal weights for Bui's method*/ float **real_term_wgts; /* one of the above */ float *twptr; /* loops through term_wgts */ float *twptr_save; /* copy of twptr */ float *ctwptr; /* loops through cterm_wgts */ double *vwsqrt = NULL; /* square root of vertex weights */ double norm, alpha; /* values used for orthogonalization */ double initshift; /* initial shift for RQI */ double total_vwgt; /* sum of all the vertex weights */ double w1, w2; /* weights of two sets */ double sigma; /* norm of rhs in extended eigenproblem */ double term_tot; /* sum of all terminal weights */ int *space; /* room for assignment in Lanczos */ int *morespace; /* room for assignment in Lanczos */ int *v2cv; /* mapping from vertices to coarse vtxs */ int vwgt_max; /* largest vertex weight */ int oldperturb; /* saves PERTURB value */ int cnvtxs; /* number of vertices in coarsened graph */ int cnedges; /* number of edges in coarsened graph */ int nextstep; /* next step in RQI test */ int nsets; /* number of sets being created */ int i, j; /* loop counters */ double time; /* time marker */ double dot(), ch_normalize(), find_maxdeg(), seconds(); struct orthlink *makeorthlnk(); void makevwsqrt(), eigensolve(), coarsen1(), orthogvec(), rqi_ext(); void ch_interpolate(), orthog1(), rqi(), scadd(), free_graph(); if (DEBUG_COARSEN > 0) { printf("<Entering coarsen, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n", step, nvtxs, nedges, vmax); } nsets = 1 << ndims; /* Is problem small enough to solve? */ if (nvtxs <= vmax || give_up) { if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); } else vwsqrt = NULL; maxdeg = find_maxdeg(graph, nvtxs, using_ewgts, (float *) NULL); if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } eigensolve(graph, nvtxs, nedges, maxdeg, vwgt_max, vwsqrt, using_vwgts, using_ewgts, real_term_wgts, igeom, coords, yvecs, evals, 0, space, goal, solver_flag, FALSE, 0, ndims, 3, eigtol); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(real_term_wgts[1]); } sfree(space); if (vwsqrt != NULL) sfree(vwsqrt); return; } /* Otherwise I have to coarsen. */ if (coords != NULL) { ccoords = smalloc(igeom * sizeof(float *)); } else { ccoords = NULL; } coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv, igeom, coords, ccoords, using_ewgts); /* If coarsening isn't working very well, give up and partition. */ give_up = FALSE; if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax ) { printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); printf(" Recursive coarsening being stopped prematurely.\n"); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); fprintf(Output_File, " Recursive coarsening being stopped prematurely.\n"); } give_up = TRUE; } /* Create space for subgraph yvecs. */ for (i = 1; i <= ndims; i++) { cyvecs[i] = smalloc((cnvtxs + 1) * sizeof(double)); } /* Make coarse version of terminal weights. */ if (term_wgts[1] != NULL) { twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (i = (cnvtxs + 1) * (nsets - 1); i ; i--) { *twptr++ = 0; } twptr = twptr_save; for (j = 1; j < nsets; j++) { cterm_wgts[j] = twptr; twptr += cnvtxs + 1; } for (j = 1; j < nsets; j++) { ctwptr = cterm_wgts[j]; twptr = term_wgts[j]; for (i = 1; i < nvtxs; i++){ ctwptr[v2cv[i]] += twptr[i]; } } } else { cterm_wgts[1] = NULL; } /* Now recurse on coarse subgraph. */ nextstep = step + 1; coarsen(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts, igeom, ccoords, cyvecs, ndims, solver_flag, vmax, eigtol, nstep, nextstep, give_up); ch_interpolate(yvecs, cyvecs, ndims, graph, nvtxs, v2cv, using_ewgts); sfree(cterm_wgts[1]); sfree(v2cv); /* I need to do Rayleigh Quotient Iteration each nstep stages. */ time = seconds(); if (!(step % nstep)) { oldperturb = PERTURB; PERTURB = FALSE; /* Should I do some orthogonalization here against vwsqrt? */ if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); for (i = 1; i <= ndims; i++) orthogvec(yvecs[i], 1, nvtxs, vwsqrt); } else for (i = 1; i <= ndims; i++) orthog1(yvecs[i], 1, nvtxs); /* Allocate space that will be needed in RQI. */ r1 = smalloc(7 * (nvtxs + 1) * sizeof(double)); r2 = &r1[nvtxs + 1]; v = &r1[2 * (nvtxs + 1)]; w = &r1[3 * (nvtxs + 1)]; x = &r1[4 * (nvtxs + 1)]; y = &r1[5 * (nvtxs + 1)]; work = &r1[6 * (nvtxs + 1)]; if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); morespace = smalloc((nvtxs) * sizeof(int)); initshift = 0; orthlist = NULL; for (i = 1; i < ndims; i++) { ch_normalize(yvecs[i], 1, nvtxs); rqi(graph, yvecs, i, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); /* Now orthogonalize higher yvecs against this one. */ norm = dot(yvecs[i], 1, nvtxs, yvecs[i]); for (j = i + 1; j <= ndims; j++) { alpha = -dot(yvecs[j], 1, nvtxs, yvecs[i]) / norm; scadd(yvecs[j], 1, nvtxs, alpha, yvecs[i]); } /* Now prepare for next pass through loop. */ initshift = evalest; newlink = makeorthlnk(); newlink->vec = yvecs[i]; newlink->pntr = orthlist; orthlist = newlink; } ch_normalize(yvecs[ndims], 1, nvtxs); if (term_wgts[1] != NULL && ndims == 1) { /* Solve extended eigen problem */ /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } /* Following only works for bisection. */ w1 = goal[0]; w2 = goal[1]; sigma = sqrt(4*w1*w2/(w1+w2)); gvec = smalloc((nvtxs+1)*sizeof(double)); term_tot = sigma; /* Avoids lint warning for now. */ term_tot = 0; for (j=1; j<=nvtxs; j++) term_tot += (real_term_wgts[1])[j]; term_tot /= (w1+w2); if (using_vwgts) { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j]/graph[j]->vwgt - term_tot; } } else { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j] - term_tot; } } rqi_ext(); sfree(gvec); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(new_term_wgts[1]); } } else { rqi(graph, yvecs, ndims, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); } refine_time += seconds() - time; /* Free the space allocated for RQI. */ sfree(morespace); sfree(space); while (orthlist != NULL) { newlink = orthlist->pntr; sfree(orthlist); orthlist = newlink; } sfree(r1); if (vwsqrt != NULL) sfree(vwsqrt); PERTURB = oldperturb; } if (DEBUG_COARSEN > 0) { printf(" Leaving coarsen, step=%d\n", step); } /* Free the space that was allocated. */ if (ccoords != NULL) { for (i = 0; i < igeom; i++) sfree(ccoords[i]); sfree(ccoords); } for (i = ndims; i > 0; i--) sfree(cyvecs[i]); free_graph(cgraph); }
int submain ( struct vtx_data **graph, /* data structure for graph */ int nvtxs, /* number of vertices in full graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertex weights being used? */ int using_ewgts, /* are edge weights being used? */ int igeom, /* geometry dimension if using inertial method */ float **coords, /* coordinates of vertices if used */ char *outassignname, /* name of assignment output file */ char *outfilename, /* in which to print output metrics */ int *assignment, /* set number of each vtx (length n) */ double *goal, /* desired sizes for each set */ int architecture, /* 0=> hypercube, d=> d-dimensional mesh */ int ndims_tot, /* total number hypercube dimensions */ int mesh_dims[3], /* extent of mesh in 3 directions */ int global_method, /* global partitioning algorithm */ int local_method, /* local partitioning algorithm */ int rqi_flag, /* use RQI/Symmlq eigensolver? */ int vmax, /* if so, how many vtxs to coarsen down to */ int ndims, /* number of eigenvectors (2^d sets) */ double eigtol, /* tolerance on eigenvectors */ long seed /* for random graph mutations */ ) { extern int ECHO; /* controls output to file or screen */ extern int CHECK_INPUT; /* should I check input for correctness? */ extern int SEQUENCE; /* just generate spectal ordering? */ extern int OUTPUT_ASSIGN; /* print assignment to a file? */ extern int OUTPUT_METRICS; /* controls formatting of output */ extern int PERTURB; /* perturb matrix if quad/octasection? */ extern int NSQRTS; /* number of square roots to precompute */ extern int KL_METRIC; /* KL interset cost: 1=>cuts, 2=>hops */ extern int LANCZOS_TYPE; /* type of Lanczos to use */ extern int REFINE_MAP; /* use greedy strategy to improve mapping? */ extern int REFINE_PARTITION;/* number of calls to pairwise_refine to make */ extern int VERTEX_COVER; /* use matching to reduce vertex separator? */ extern int CONNECTED_DOMAINS; /* force subdomain connectivity at end? */ extern int INTERNAL_VERTICES; /* greedily increase internal vtxs? */ extern int DEBUG_INTERNAL; /* debug code about force_internal? */ extern int DEBUG_REFINE_PART; /* debug code about refine_part? */ extern int DEBUG_REFINE_MAP; /* debug code about refine_map? */ extern int DEBUG_MACH_PARAMS; /* print out computed machine params? */ extern int DEBUG_TRACE; /* trace main execution path */ extern int PRINT_HEADERS; /* print section headings for output? */ extern int TIME_KERNELS; /* benchmark some numerical kernels? */ extern double start_time; /* time code was entered */ extern double total_time; /* (almost) total time spent in code */ extern double check_input_time; /* time spent checking input */ extern double partition_time; /* time spent partitioning graph */ extern double kernel_time; /* time spent benchmarking kernels */ extern double count_time; /* time spent evaluating the answer */ extern double print_assign_time; /* time spent writing output file */ FILE *outfile; /* output file */ struct vtx_data **graph2; /* data structure for graph */ int hop_mtx[MAXSETS][MAXSETS]; /* between-set hop cost for KL */ double *vwsqrt; /* sqrt of vertex weights (length nvtxs+1) */ double time, time1; /* timing variables */ char *graphname, *geomname; /* names of input files */ char *inassignname; /* name of assignment input file */ int old_nsqrts; /* old value of NSQRTS */ int append; /* append output to existing file? */ int nsets; /* number of sets created by each divide */ int nsets_tot; /* total number of sets */ int bits; /* used in computing hops */ int flag; /* return code from check_input */ int old_perturb=0; /* saves original pertubation flag */ int i, j, k; /* loop counters */ double seconds(); void setrandom(long int seed); int check_input(), refine_part(); void connect_enforce(); void setrandom(), makevwsqrt(), balance(), countup(); void force_internal(), sequence(), reflect_input(); void machine_params(), assign_out(), refine_map(); void time_out(), time_kernels(), strout(); if (DEBUG_TRACE > 0) { printf("<Entering submain>\n"); } /* First check all the input for consistency. */ if (architecture == 1) mesh_dims[1] = mesh_dims[2] = 1; else if (architecture == 2) mesh_dims[2] = 1; /* Check for simple special case of 1 processor. */ k = 0; if (architecture == 0) k = 1 << ndims_tot; else if (architecture > 0) k = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; if (k == 1) { for (i = 1; i <= nvtxs; i++) assignment[i] = 0; if (OUTPUT_ASSIGN > 0 && outassignname != NULL) { time1 = seconds(); assign_out(nvtxs, assignment, k, outassignname); print_assign_time += seconds() - time1; } return(0); } graphname = Graph_File_Name; geomname = Geometry_File_Name; inassignname = Assign_In_File_Name; /* Turn of perturbation if using bisection */ if (ndims == 1) { old_perturb = PERTURB; PERTURB = FALSE; } if (ECHO < 0 && outfilename != NULL) { /* Open output file */ outfile = fopen(outfilename, "r"); if (outfile != NULL) { append = TRUE; fclose(outfile); } else append = FALSE; outfile = fopen(outfilename, "a"); if (append) { fprintf(outfile, "\n------------------------------------------------\n\n"); } } else { outfile = NULL; } Output_File = outfile; if (outfile != NULL && PRINT_HEADERS) { fprintf(outfile, "\n Chaco 2.0\n"); fprintf(outfile, " Sandia National Laboratories\n\n"); } if (CHECK_INPUT) { /* Check the input for inconsistencies. */ time1 = seconds(); flag = check_input(graph, nvtxs, nedges, igeom, coords, graphname, assignment, goal, architecture, ndims_tot, mesh_dims, global_method, local_method, rqi_flag, &vmax, ndims, eigtol); check_input_time += seconds() - time1; if (flag) { strout("ERROR IN INPUT.\n"); return (1); } } if (ECHO != 0) { reflect_input(nvtxs, nedges, igeom, graphname, geomname, inassignname, outassignname, outfilename, architecture, ndims_tot, mesh_dims, global_method, local_method, rqi_flag, vmax, ndims, eigtol, seed, outfile); } if (PRINT_HEADERS) { printf("\n\nStarting to partition ...\n\n"); if (Output_File != NULL ) { fprintf(Output_File, "\n\nStarting to partition ... (residual, warning and error messages only)\n\n"); } } time = seconds(); /* Perform some one-time initializations. */ setrandom(seed); machine_params(&DOUBLE_EPSILON, &DOUBLE_MAX); if (DEBUG_MACH_PARAMS > 0) { printf("Machine parameters:\n"); printf(" DOUBLE_EPSILON = %e\n", DOUBLE_EPSILON); printf(" DOUBLE_MAX = %e\n", DOUBLE_MAX); } nsets = (1 << ndims); old_nsqrts = NSQRTS; if (nvtxs < NSQRTS && !using_vwgts) { NSQRTS = nvtxs; } SQRTS = smalloc_ret((NSQRTS + 1) * sizeof(double)); if (SQRTS == NULL) { strout("ERROR: No space to allocate sqrts\n"); return(1); } for (i = 1; i <= NSQRTS; i++) SQRTS[i] = sqrt((double) i); if (using_vwgts && (global_method == 1 || global_method == 2)) { vwsqrt = smalloc_ret((nvtxs + 1) * sizeof(double)); if (vwsqrt == NULL) { strout("ERROR: No space to allocate vwsqrt\n"); sfree(SQRTS); NSQRTS = old_nsqrts; return(1); } makevwsqrt(vwsqrt, graph, nvtxs); } else vwsqrt = NULL; if (TIME_KERNELS) { time1 = seconds(); time_kernels(graph, nvtxs, vwsqrt); kernel_time += seconds() - time1; } if (SEQUENCE) { sequence(graph, nvtxs, nedges, using_ewgts, vwsqrt, LANCZOS_TYPE, rqi_flag, vmax, eigtol); goto End_Label; } /* Initialize cost function for KL-spiff */ if (global_method == 1 || local_method == 1) { for (i = 0; i < nsets; i++) { hop_mtx[i][i] = 0; for (j = 0; j < i; j++) { if (KL_METRIC == 2) { /* Count hypercube hops */ hop_mtx[i][j] = 0; bits = i ^ j; while (bits) { if (bits & 1) { ++hop_mtx[i][j]; } bits >>= 1; } } else if (KL_METRIC == 1) { /* Count cut edges */ hop_mtx[i][j] = 1; } hop_mtx[j][i] = hop_mtx[i][j]; } }