void coarsen ( /* Coarsen until nvtxs <= vmax, compute and uncoarsen. */ struct vtx_data **graph, /* array of vtx data for graph */ int nvtxs, /* number of vertices in graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertices weights being used? */ int using_ewgts, /* are edge weights being used? */ float *term_wgts[], /* terminal weights */ int igeom, /* dimension for geometric information */ float **coords, /* coordinates for vertices */ double **yvecs, /* eigenvectors returned */ int ndims, /* number of eigenvectors to calculate */ int solver_flag, /* which eigensolver to use */ int vmax, /* largest subgraph to stop coarsening */ double eigtol, /* tolerence in eigen calculation */ int nstep, /* number of coarsenings between RQI steps */ int step, /* current step number */ int give_up /* has coarsening bogged down? */ ) { extern FILE *Output_File; /* output file or null */ extern int DEBUG_COARSEN; /* debug flag for coarsening */ extern int PERTURB; /* was matrix perturbed in Lanczos? */ extern double COARSEN_RATIO_MIN; /* min vtx reduction for coarsening */ extern int COARSEN_VWGTS; /* use vertex weights while coarsening? */ extern int COARSEN_EWGTS; /* use edge weights while coarsening? */ extern double refine_time; /* time for RQI/Symmlq iterative refinement */ struct vtx_data **cgraph; /* array of vtx data for coarsened graph */ struct orthlink *orthlist; /* list of lower evecs to suppress */ struct orthlink *newlink; /* lower evec to suppress */ double *cyvecs[MAXDIMS + 1]; /* eigenvectors for subgraph */ double evals[MAXDIMS + 1]; /* eigenvalues returned */ double goal[MAXSETS]; /* needed for convergence mode = 1 */ double *r1, *r2, *work; /* space needed by symmlq/RQI */ double *v, *w, *x, *y; /* space needed by symmlq/RQI */ double *gvec; /* rhs vector in extended eigenproblem */ double evalest; /* eigenvalue estimate returned by RQI */ double maxdeg; /* maximum weighted degree of a vertex */ float **ccoords; /* coordinates for coarsened graph */ float *cterm_wgts[MAXSETS]; /* coarse graph terminal weights */ float *new_term_wgts[MAXSETS]; /* terminal weights for Bui's method*/ float **real_term_wgts; /* one of the above */ float *twptr; /* loops through term_wgts */ float *twptr_save; /* copy of twptr */ float *ctwptr; /* loops through cterm_wgts */ double *vwsqrt = NULL; /* square root of vertex weights */ double norm, alpha; /* values used for orthogonalization */ double initshift; /* initial shift for RQI */ double total_vwgt; /* sum of all the vertex weights */ double w1, w2; /* weights of two sets */ double sigma; /* norm of rhs in extended eigenproblem */ double term_tot; /* sum of all terminal weights */ int *space; /* room for assignment in Lanczos */ int *morespace; /* room for assignment in Lanczos */ int *v2cv; /* mapping from vertices to coarse vtxs */ int vwgt_max; /* largest vertex weight */ int oldperturb; /* saves PERTURB value */ int cnvtxs; /* number of vertices in coarsened graph */ int cnedges; /* number of edges in coarsened graph */ int nextstep; /* next step in RQI test */ int nsets; /* number of sets being created */ int i, j; /* loop counters */ double time; /* time marker */ double dot(), ch_normalize(), find_maxdeg(), seconds(); struct orthlink *makeorthlnk(); void makevwsqrt(), eigensolve(), coarsen1(), orthogvec(), rqi_ext(); void ch_interpolate(), orthog1(), rqi(), scadd(), free_graph(); if (DEBUG_COARSEN > 0) { printf("<Entering coarsen, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n", step, nvtxs, nedges, vmax); } nsets = 1 << ndims; /* Is problem small enough to solve? */ if (nvtxs <= vmax || give_up) { if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); } else vwsqrt = NULL; maxdeg = find_maxdeg(graph, nvtxs, using_ewgts, (float *) NULL); if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } eigensolve(graph, nvtxs, nedges, maxdeg, vwgt_max, vwsqrt, using_vwgts, using_ewgts, real_term_wgts, igeom, coords, yvecs, evals, 0, space, goal, solver_flag, FALSE, 0, ndims, 3, eigtol); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(real_term_wgts[1]); } sfree(space); if (vwsqrt != NULL) sfree(vwsqrt); return; } /* Otherwise I have to coarsen. */ if (coords != NULL) { ccoords = smalloc(igeom * sizeof(float *)); } else { ccoords = NULL; } coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv, igeom, coords, ccoords, using_ewgts); /* If coarsening isn't working very well, give up and partition. */ give_up = FALSE; if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax ) { printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); printf(" Recursive coarsening being stopped prematurely.\n"); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); fprintf(Output_File, " Recursive coarsening being stopped prematurely.\n"); } give_up = TRUE; } /* Create space for subgraph yvecs. */ for (i = 1; i <= ndims; i++) { cyvecs[i] = smalloc((cnvtxs + 1) * sizeof(double)); } /* Make coarse version of terminal weights. */ if (term_wgts[1] != NULL) { twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (i = (cnvtxs + 1) * (nsets - 1); i ; i--) { *twptr++ = 0; } twptr = twptr_save; for (j = 1; j < nsets; j++) { cterm_wgts[j] = twptr; twptr += cnvtxs + 1; } for (j = 1; j < nsets; j++) { ctwptr = cterm_wgts[j]; twptr = term_wgts[j]; for (i = 1; i < nvtxs; i++){ ctwptr[v2cv[i]] += twptr[i]; } } } else { cterm_wgts[1] = NULL; } /* Now recurse on coarse subgraph. */ nextstep = step + 1; coarsen(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts, igeom, ccoords, cyvecs, ndims, solver_flag, vmax, eigtol, nstep, nextstep, give_up); ch_interpolate(yvecs, cyvecs, ndims, graph, nvtxs, v2cv, using_ewgts); sfree(cterm_wgts[1]); sfree(v2cv); /* I need to do Rayleigh Quotient Iteration each nstep stages. */ time = seconds(); if (!(step % nstep)) { oldperturb = PERTURB; PERTURB = FALSE; /* Should I do some orthogonalization here against vwsqrt? */ if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); for (i = 1; i <= ndims; i++) orthogvec(yvecs[i], 1, nvtxs, vwsqrt); } else for (i = 1; i <= ndims; i++) orthog1(yvecs[i], 1, nvtxs); /* Allocate space that will be needed in RQI. */ r1 = smalloc(7 * (nvtxs + 1) * sizeof(double)); r2 = &r1[nvtxs + 1]; v = &r1[2 * (nvtxs + 1)]; w = &r1[3 * (nvtxs + 1)]; x = &r1[4 * (nvtxs + 1)]; y = &r1[5 * (nvtxs + 1)]; work = &r1[6 * (nvtxs + 1)]; if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); morespace = smalloc((nvtxs) * sizeof(int)); initshift = 0; orthlist = NULL; for (i = 1; i < ndims; i++) { ch_normalize(yvecs[i], 1, nvtxs); rqi(graph, yvecs, i, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); /* Now orthogonalize higher yvecs against this one. */ norm = dot(yvecs[i], 1, nvtxs, yvecs[i]); for (j = i + 1; j <= ndims; j++) { alpha = -dot(yvecs[j], 1, nvtxs, yvecs[i]) / norm; scadd(yvecs[j], 1, nvtxs, alpha, yvecs[i]); } /* Now prepare for next pass through loop. */ initshift = evalest; newlink = makeorthlnk(); newlink->vec = yvecs[i]; newlink->pntr = orthlist; orthlist = newlink; } ch_normalize(yvecs[ndims], 1, nvtxs); if (term_wgts[1] != NULL && ndims == 1) { /* Solve extended eigen problem */ /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } /* Following only works for bisection. */ w1 = goal[0]; w2 = goal[1]; sigma = sqrt(4*w1*w2/(w1+w2)); gvec = smalloc((nvtxs+1)*sizeof(double)); term_tot = sigma; /* Avoids lint warning for now. */ term_tot = 0; for (j=1; j<=nvtxs; j++) term_tot += (real_term_wgts[1])[j]; term_tot /= (w1+w2); if (using_vwgts) { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j]/graph[j]->vwgt - term_tot; } } else { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j] - term_tot; } } rqi_ext(); sfree(gvec); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(new_term_wgts[1]); } } else { rqi(graph, yvecs, ndims, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); } refine_time += seconds() - time; /* Free the space allocated for RQI. */ sfree(morespace); sfree(space); while (orthlist != NULL) { newlink = orthlist->pntr; sfree(orthlist); orthlist = newlink; } sfree(r1); if (vwsqrt != NULL) sfree(vwsqrt); PERTURB = oldperturb; } if (DEBUG_COARSEN > 0) { printf(" Leaving coarsen, step=%d\n", step); } /* Free the space that was allocated. */ if (ccoords != NULL) { for (i = 0; i < igeom; i++) sfree(ccoords[i]); sfree(ccoords); } for (i = ndims; i > 0; i--) sfree(cyvecs[i]); free_graph(cgraph); }
/* Perform KL between two sets. */ int kl_refine ( struct vtx_data **graph, /* graph data structure */ struct vtx_data **subgraph, /* space for subgraph to refine */ struct bilist *set_list, /* lists of vtxs in each set */ struct bilist *vtx_elems, /* start of storage for lists */ int *new_assign, /* set assignments for all vertices */ int set1, int set2, /* two sets being refined */ int *glob2loc, /* maps vertices to subgraph vertices */ int *loc2glob, /* maps subgraph vertices to vertices */ int *sub_assign, /* new assignment for subgraphs */ int *old_sub_assign, /* current assignment for subgraphs */ int *degrees, /* space for forming subgraphs */ int using_ewgts, /* are edge weights being used? */ int (*hops)[MAXSETS], /* KL set preferences */ double *goal, /* desired set sizes */ int *sizes, /* number of vertices in different sets */ float *term_wgts[], /* space for terminal propagation weights */ int architecture, /* 0 => hypercube, d => d-dimensional mesh */ int mesh_dims[3] /* if mesh, how big is it? */ ) { extern int TERM_PROP; /* perform terminal propagation? */ extern double KL_IMBALANCE; /* fractional imbalance allowed in KL */ struct bilist *ptr; /* element in set_list */ double subgoal[2]; /* goal within two subgraphs */ double weights[2]; /* weights for each set */ double maxdeg; /* largest degree of a vertex */ double ratio; /* set sizes / goals */ int *null_ptr; /* argument to klspiff */ int vwgt_max; /* largest vertex weight */ int max_dev; /* largest set deviation allowed in KL */ int subnvtxs; /* number of vtxs in subgraph */ int vwgt_sum1; /* sum of vertex wgts in first set */ int vwgt_sum2; /* sum of vertex wgts in second set */ int subnedges; /* number of edges in subgraph */ int setA, setB; /* two sets being refined */ int nsame; /* number of vertices not moved */ int vtx; /* vertex in subgraph */ int i; /* loop counter */ double find_maxdeg(); void make_maps_ref(), make_subgraph(), remake_graph(); void klspiff(), make_terms_ref(), count_weights(); /* Compute all the quantities I'll need. */ null_ptr = NULL; make_maps_ref(graph, set_list, vtx_elems, new_assign, sub_assign, set1, set2, glob2loc, loc2glob, &subnvtxs, &vwgt_max, &vwgt_sum1, &vwgt_sum2); for (i = 1; i <= subnvtxs; i++) old_sub_assign[i] = sub_assign[i]; /* Set up goals for this KL invocation. */ ratio = (vwgt_sum1 + vwgt_sum2) / (goal[set1] + goal[set2]); subgoal[0] = ratio * goal[set1]; subgoal[1] = ratio * goal[set2]; if (TERM_PROP) { make_terms_ref(graph, using_ewgts, subnvtxs, loc2glob, set1, set2, new_assign, architecture, mesh_dims, term_wgts); } /* New_assign has overwritten set2 with set1. */ make_subgraph(graph, subgraph, subnvtxs, &subnedges, new_assign, set1, glob2loc, loc2glob, degrees, using_ewgts); maxdeg = find_maxdeg(subgraph, subnvtxs, using_ewgts, (float *) NULL); count_weights(subgraph, subnvtxs, sub_assign, 2, weights, (vwgt_max != 1)); max_dev = vwgt_max; ratio = (subgoal[0] + subgoal[1]) * KL_IMBALANCE / 2; if (ratio > max_dev) { max_dev = ratio; } klspiff(subgraph, subnvtxs, sub_assign, 2, hops, subgoal, term_wgts, max_dev, maxdeg, using_ewgts, &null_ptr, weights); /* Figure out which modification leaves most vertices intact. */ nsame = 0; for (i = 1; i <= subnvtxs; i++) { if (old_sub_assign[i] == sub_assign[i]) nsame++; } if (2 * nsame > subnvtxs) { setA = set1; setB = set2; } else { setA = set2; setB = set1; } /* Now update the assignments. */ sizes[setA] = sizes[setB] = 0; for (i = 1; i <= subnvtxs; i++) { vtx = loc2glob[i]; /* Update the set_lists. */ ptr = &(vtx_elems[vtx]); if (ptr->next != NULL) { ptr->next->prev = ptr->prev; } if (ptr->prev != NULL) { ptr->prev->next = ptr->next; } if (sub_assign[i] == 0) { new_assign[vtx] = (int) setA; ++sizes[setA]; ptr->next = set_list[setA].next; if (ptr->next != NULL) ptr->next->prev = ptr; ptr->prev = &(set_list[setA]); set_list[setA].next = ptr; } else { new_assign[vtx] = (int) setB; ++sizes[setB]; ptr->next = set_list[setB].next; if (ptr->next != NULL) ptr->next->prev = ptr; ptr->prev = &(set_list[setB]); set_list[setB].next = ptr; } } remake_graph(subgraph, subnvtxs, loc2glob, degrees, using_ewgts); return(nsame != subnvtxs); }
/* begin at 1 instead of at 0. */ int refine_mesh(struct vtx_data **comm_graph, /* graph for communication requirements */ int cube_or_mesh, /* number of dimensions in mesh */ int mesh_dims[3], /* dimensions of mesh */ double maxdesire, /* largest possible desire to flip an edge */ int * vtx2node, /* mapping from comm_graph vtxs to mesh nodes */ int * node2vtx /* mapping from mesh nodes to comm_graph vtxs */ ) { struct refine_vdata * vdata = NULL; /* desire data for all vertices */ struct refine_vdata * vptr; /* loops through vdata */ struct refine_edata * edata = NULL; /* desire data for all edges */ struct refine_edata * eguy; /* one element in edata array */ struct refine_edata **desire_ptr = NULL; /* array of desire buckets */ double * desires = NULL; /* each edge's inclination to flip */ int * indices = NULL; /* sorted list of desire values */ int * space = NULL; /* used for sorting disire values */ double best_desire; /* highest desire of edge to flip */ int imax; /* maxdesire rounded up */ int nsets_tot; /* total number of sets/processors */ int neighbor; /* neighboring vertex */ int dim; /* loops over mesh dimensions */ int nwires; /* number of wires in processor mesh */ int wire; /* loops through all wires */ int node1, node2; /* processors joined by a wire */ int vtx1, vtx2; /* corresponding vertices in comm_graph */ int loc1, loc2; /* location of vtxs in flipping dimension */ int error; /* out of space? */ int i, j, k; /* loop counter */ double find_maxdeg(); double compute_mesh_edata(); void compute_mesh_vdata(), init_mesh_edata(), mergesort(); void update_mesh_vdata(), update_mesh_edata(); error = 1; nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; imax = maxdesire; if (imax != maxdesire) imax++; vdata = (struct refine_vdata *)smalloc_ret((cube_or_mesh * nsets_tot + 1) * sizeof(struct refine_vdata)); if (vdata == NULL) goto skip; /* Compute each node's desires to move or stay put. */ vptr = vdata; for (dim = 0; dim < cube_or_mesh; dim++) { for (i = 1; i <= nsets_tot; i++) { compute_mesh_vdata(++vptr, comm_graph, i, vtx2node, mesh_dims, dim); } } nwires = (mesh_dims[0] - 1) * mesh_dims[1] * mesh_dims[2] + mesh_dims[0] * (mesh_dims[1] - 1) * mesh_dims[2] + mesh_dims[0] * mesh_dims[1] * (mesh_dims[2] - 1); edata = smalloc_ret((nwires + 1) * sizeof(struct refine_edata)); desires = smalloc_ret(nwires * sizeof(double)); if (vdata == NULL || desires == NULL) goto skip; /* Initialize all the edge values. */ init_mesh_edata(edata, mesh_dims); for (wire = 0; wire < nwires; wire++) { desires[wire] = edata[wire].swap_desire = compute_mesh_edata(&(edata[wire]), vdata, mesh_dims, comm_graph, node2vtx); } /* Set special value for end pointer. */ edata[nwires].swap_desire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL); /* I now need to sort all the wire preference values */ indices = smalloc_ret(nwires * sizeof(int)); space = smalloc_ret(nwires * sizeof(int)); if (indices == NULL || space == NULL) goto skip; mergesort(desires, nwires, indices, space); sfree(space); sfree(desires); space = NULL; desires = NULL; best_desire = (edata[indices[nwires - 1]]).swap_desire; /* Now construct a buckets of linked lists with desire values. */ if (best_desire > 0) { desire_ptr = (struct refine_edata **)smalloc_ret((2 * imax + 1) * sizeof(struct refine_edata *)); if (desire_ptr == NULL) goto skip; for (i = 2 * imax; i >= 0; i--) desire_ptr[i] = NULL; for (i = nwires - 1; i >= 0; i--) { eguy = &(edata[indices[i]]); /* Round the swap desire up. */ if (eguy->swap_desire >= 0) { k = eguy->swap_desire; if (k != eguy->swap_desire) k++; } else { k = -eguy->swap_desire; if (k != -eguy->swap_desire) k++; k = -k; } k += imax; eguy->prev = NULL; eguy->next = desire_ptr[k]; if (desire_ptr[k] != NULL) desire_ptr[k]->prev = eguy; desire_ptr[k] = eguy; } } else { desire_ptr = NULL; } sfree(indices); indices = NULL; loc1 = 0; loc2 = 0; /* Everything is now set up. Swap sets across wires until no more improvement. */ while (best_desire > 0) { k = best_desire + 1 + imax; if (k > 2 * imax) k = 2 * imax; while (k > imax && desire_ptr[k] == NULL) k--; eguy = desire_ptr[k]; dim = eguy->dim; node1 = eguy->node1; node2 = eguy->node2; vtx1 = node2vtx[node1]; vtx2 = node2vtx[node2]; if (dim == 0) { loc1 = node1 % mesh_dims[0]; loc2 = node2 % mesh_dims[0]; } else if (dim == 1) { loc1 = (node1 / mesh_dims[0]) % mesh_dims[1]; loc2 = (node2 / mesh_dims[0]) % mesh_dims[1]; } else if (dim == 2) { loc1 = node1 / (mesh_dims[0] * mesh_dims[1]); loc2 = node2 / (mesh_dims[0] * mesh_dims[1]); } /* Now swap the vertices. */ node2vtx[node1] = (int)vtx2; node2vtx[node2] = (int)vtx1; vtx2node[vtx1] = (int)node2; vtx2node[vtx2] = (int)node1; /* First update all the vdata fields for vertices effected by this flip. */ for (j = 1; j < comm_graph[vtx1]->nedges; j++) { neighbor = comm_graph[vtx1]->edges[j]; if (neighbor != vtx2) update_mesh_vdata(loc1, loc2, dim, comm_graph[vtx1]->ewgts[j], vdata, mesh_dims, neighbor, vtx2node); } for (j = 1; j < comm_graph[vtx2]->nedges; j++) { neighbor = comm_graph[vtx2]->edges[j]; if (neighbor != vtx1) update_mesh_vdata(loc2, loc1, dim, comm_graph[vtx2]->ewgts[j], vdata, mesh_dims, neighbor, vtx2node); } /* Now recompute all preferences for vertices that were moved. */ for (j = 0; j < cube_or_mesh; j++) { compute_mesh_vdata(&(vdata[j * nsets_tot + vtx1]), comm_graph, vtx1, vtx2node, mesh_dims, j); compute_mesh_vdata(&(vdata[j * nsets_tot + vtx2]), comm_graph, vtx2, vtx2node, mesh_dims, j); } /* Now I can update the values of all the edges associated with all the effected vertices. Note that these include mesh neighbors of node1 and node2 in addition to the dim-edges of graph neighbors of vtx1 and vtx2. */ /* For each neighbor vtx, look at -1 and +1 edge. If desire hasn't changed, return. Otherwise, pick him up and move him. Similarly for all directional neighbors of node1 and node2. */ for (j = 1; j < comm_graph[vtx1]->nedges; j++) { neighbor = comm_graph[vtx1]->edges[j]; if (neighbor != vtx2) update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } for (j = 1; j < comm_graph[vtx2]->nedges; j++) { neighbor = comm_graph[vtx2]->edges[j]; if (neighbor != vtx1) update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } for (j = 0; j < cube_or_mesh; j++) { update_mesh_edata(vtx1, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); update_mesh_edata(vtx2, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } k = best_desire + 1 + imax; if (k > 2 * imax) k = 2 * imax; while (k > imax && desire_ptr[k] == NULL) k--; best_desire = k - imax; } error = 0; skip: sfree(indices); sfree(space); sfree(desires); sfree(desire_ptr); sfree(vdata); sfree(edata); return (error); }
void refine_map(struct vtx_data **graph, /* graph data structure */ int nvtxs, /* number of vertices in graph */ int using_ewgts, /* are edge weights being used? */ int * assign, /* current assignment */ int cube_or_mesh, /* 0 => hypercube, d => d-dimensional mesh */ int ndims_tot, /* if hypercube, number of dimensions */ int mesh_dims[3] /* if mesh, dimensions of mesh */ ) { struct vtx_data **comm_graph; /* graph for communication requirements */ int nsets_tot = 0; /* total number of sets */ int * vtx2node = NULL; /* mapping of comm_graph vtxs to processors */ int * node2vtx = NULL; /* mapping of sets to comm_graph vtxs */ double maxdesire = 0.0; /* largest possible desire to flip an edge */ int error = 0; /* out of space? */ int i; /* loop counter */ double find_maxdeg(); void free_graph(), strout(); int make_comm_graph(), refine_mesh(), refine_cube(); if (cube_or_mesh == 0) nsets_tot = 1 << ndims_tot; else if (cube_or_mesh == 1) nsets_tot = mesh_dims[0]; else if (cube_or_mesh == 2) nsets_tot = mesh_dims[0] * mesh_dims[1]; else if (cube_or_mesh == 3) nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; node2vtx = vtx2node = NULL; /* Construct the weighted quotient graph representing communication. */ error = make_comm_graph(&comm_graph, graph, nvtxs, using_ewgts, assign, nsets_tot); if (!error) { maxdesire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL); vtx2node = smalloc_ret((nsets_tot + 1) * sizeof(int)); node2vtx = smalloc_ret(nsets_tot * sizeof(int)); if (node2vtx == NULL || vtx2node == NULL) { error = 1; goto skip; } for (i = 1; i <= nsets_tot; i++) { vtx2node[i] = (int)i - 1; node2vtx[i - 1] = (int)i; } if (cube_or_mesh > 0) { error = refine_mesh(comm_graph, cube_or_mesh, mesh_dims, maxdesire, vtx2node, node2vtx); } else if (cube_or_mesh == 0) { error = refine_cube(comm_graph, ndims_tot, maxdesire, vtx2node, node2vtx); } if (!error) { for (i = 1; i <= nvtxs; i++) { assign[i] = vtx2node[assign[i] + 1]; } } } skip: if (error) { strout("\nWARNING: No space to refine mapping to processors."); strout(" NO MAPPING REFINEMENT PERFORMED.\n"); } sfree(node2vtx); sfree(vtx2node); free_graph(comm_graph); }
void coarsen_klv ( /* Coarsen until nvtxs < vmax, compute and uncoarsen. */ struct vtx_data **graph, /* array of vtx data for graph */ int nvtxs, /* number of vertices in graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertices weights being used? */ int using_ewgts, /* are edge weights being used? */ float *term_wgts[], /* weights for terminal propogation */ int igeom, /* dimension for geometric information */ float **coords, /* coordinates for vertices */ int vwgt_max, /* largest vertex weight */ int *assignment, /* processor each vertex gets assigned to */ double *goal, /* desired set sizes */ int architecture, /* 0 => hypercube, d => d-dimensional mesh */ int (*hops)[MAXSETS], /* cost of edge between sets */ int solver_flag, /* which eigensolver to use */ int ndims, /* number of eigenvectors to calculate */ int nsets, /* number of sets being divided into */ int vmax, /* largest subgraph to stop coarsening */ int mediantype, /* flag for different assignment strategies */ int mkconnected, /* enforce connectivity before eigensolve? */ double eigtol, /* tolerence in eigen calculation */ int nstep, /* number of coarsenings between RQI steps */ int step, /* current step number */ int **pbndy_list, /* pointer to returned boundary list */ double *weights, /* weights of vertices in each set */ int give_up /* has coarsening bogged down? */ ) { extern FILE *Output_File; /* output file or null */ extern int DEBUG_TRACE; /* trace the execution of the code */ extern int DEBUG_COARSEN; /* debug flag for coarsening */ extern double COARSEN_RATIO_MIN; /* vtx reduction demanded */ extern int COARSEN_VWGTS; /* use vertex weights while coarsening? */ extern int COARSEN_EWGTS; /* use edge weights while coarsening? */ extern int LIMIT_KL_EWGTS; /* limit edges weights in KL? */ extern int COARSE_KLV; /* apply klv as a smoother? */ extern int COARSE_BPM; /* apply bipartite matching/flow as a smoother? */ extern double KL_IMBALANCE; /* fractional imbalance allowed in KL */ struct vtx_data **cgraph; /* array of vtx data for coarsened graph */ double new_goal[MAXSETS];/* new goal if not using vertex weights */ double *real_goal; /* chooses between goal and new_goal */ double total_weight; /* total weight of vertices */ double goal_weight; /* total weight of vertices in goal */ float *cterm_wgts[MAXSETS]; /* terminal weights for coarse graph */ float *new_term_wgts[MAXSETS]; /* modified for Bui's method */ float **real_term_wgts; /* which of previous two to use */ float ewgt_max; /* largest edge weight in graph */ float *twptr = NULL; /* loops through term_wgts */ float *twptr_save = NULL;/* copy of twptr */ float *ctwptr; /* loops through cterm_wgts */ float **ccoords; /* coarse graph coordinates */ int *v2cv; /* mapping from vtxs to coarse vtxs */ int *flag; /* scatter array for coarse bndy vtxs */ int *bndy_list; /* list of vertices on boundary */ int *cbndy_list; /* list of vertices of coarse graph on boundary */ int *cassignment; /* set assignments for coarsened vertices */ int flattened; /* was this graph flattened? */ int list_length; /* length of boundary vtx list */ int cnvtxs; /* number of vertices in coarsened graph */ int cnedges; /* number of edges in coarsened graph */ int cvwgt_max; /* largest vertex weight in coarsened graph */ int nextstep; /* next step in RQI test */ int max_dev; /* largest allowed deviation from balance */ int i, j; /* loop counters */ int find_bndy(), flatten(); double find_maxdeg(); void makevwsqrt(), make_connected(), print_connected(), eigensolve(); void make_unconnected(), assign(), klvspiff(), coarsen1(), free_graph(); void compress_ewgts(), restore_ewgts(), count_weights(), bpm_improve(); void simple_part(); if (DEBUG_COARSEN > 0 || DEBUG_TRACE > 0) { printf("<Entering coarsen_kl, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n", step, nvtxs, nedges, vmax); } /* Is problem small enough to solve? */ if (nvtxs <= vmax || give_up) { real_goal = goal; simple_part(graph, nvtxs, assignment, nsets, 1, real_goal); list_length = find_bndy(graph, nvtxs, assignment, 2, &bndy_list); count_weights(graph, nvtxs, assignment, nsets + 1, weights, 1); max_dev = (step == 0) ? vwgt_max : 5 * vwgt_max; total_weight = 0; for (i = 0; i < nsets; i++) total_weight += real_goal[i]; if (max_dev > total_weight) max_dev = vwgt_max; goal_weight = total_weight * KL_IMBALANCE / nsets; if (goal_weight > max_dev) max_dev = goal_weight; if (COARSE_KLV) { klvspiff(graph, nvtxs, assignment, real_goal, max_dev, &bndy_list, weights); } if (COARSE_BPM) { bpm_improve(graph, assignment, real_goal, max_dev, &bndy_list, weights, using_vwgts); } *pbndy_list = bndy_list; return; } /* Otherwise I have to coarsen. */ flattened = FALSE; if (coords != NULL) { ccoords = smalloc(igeom * sizeof(float *)); } else { ccoords = NULL; } if (FLATTEN && step == 0) { flattened = flatten(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv, using_ewgts && COARSEN_EWGTS, igeom, coords, ccoords); } if (!flattened) { coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv, igeom, coords, ccoords, using_ewgts && COARSEN_EWGTS); } if (term_wgts[1] != NULL) { twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (i = (cnvtxs + 1) * (nsets - 1); i; i--) { *twptr++ = 0; } twptr = twptr_save; for (j = 1; j < nsets; j++) { cterm_wgts[j] = twptr; twptr += cnvtxs + 1; } for (j = 1; j < nsets; j++) { ctwptr = cterm_wgts[j]; twptr = term_wgts[j]; for (i = 1; i < nvtxs; i++) { ctwptr[v2cv[i]] += twptr[i]; } } } else { cterm_wgts[1] = NULL; } /* If coarsening isn't working very well, give up and partition. */ give_up = FALSE; if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax && !flattened) { printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); printf(" Recursive coarsening being stopped prematurely.\n"); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); fprintf(Output_File, " Recursive coarsening being stopped prematurely.\n"); } give_up = TRUE; } /* Now recurse on coarse subgraph. */ if (COARSEN_VWGTS) { cvwgt_max = 0; for (i = 1; i <= cnvtxs; i++) { if (cgraph[i]->vwgt > cvwgt_max) cvwgt_max = cgraph[i]->vwgt; } } else cvwgt_max = 1; cassignment = smalloc((cnvtxs + 1) * sizeof(int)); if (flattened) nextstep = step; else nextstep = step + 1; coarsen_klv(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts, igeom, ccoords, cvwgt_max, cassignment, goal, architecture, hops, solver_flag, ndims, nsets, vmax, mediantype, mkconnected, eigtol, nstep, nextstep, &cbndy_list, weights, give_up); /* Interpolate assignment back to fine graph. */ for (i = 1; i <= nvtxs; i++) { assignment[i] = cassignment[v2cv[i]]; } /* Construct boundary list from coarse boundary list. */ flag = smalloc((cnvtxs + 1) * sizeof(int)); for (i = 1; i <= cnvtxs; i++) { flag[i] = FALSE; } for (i = 0; cbndy_list[i]; i++) { flag[cbndy_list[i]] = TRUE; } list_length = 0; for (i = 1; i <= nvtxs; i++) { if (flag[v2cv[i]]) ++list_length; } bndy_list = smalloc((list_length + 1) * sizeof(int)); list_length = 0; for (i = 1; i <= nvtxs; i++) { if (flag[v2cv[i]]) bndy_list[list_length++] = i; } bndy_list[list_length] = 0; sfree(flag); sfree(cbndy_list); /* Free the space that was allocated. */ sfree(cassignment); if (twptr_save != NULL) { sfree(twptr_save); twptr_save = NULL; } free_graph(cgraph); sfree(v2cv); /* Smooth using KL or BPM every nstep steps. */ if (!(step % nstep) && !flattened) { if (!COARSEN_VWGTS && step != 0) { /* Construct new goal */ goal_weight = 0; for (i = 0; i < nsets; i++) goal_weight += goal[i]; for (i = 0; i < nsets; i++) new_goal[i] = goal[i] * (nvtxs / goal_weight); real_goal = new_goal; } else real_goal = goal; if (LIMIT_KL_EWGTS) { find_maxdeg(graph, nvtxs, using_ewgts, &ewgt_max); compress_ewgts(graph, nvtxs, nedges, ewgt_max, using_ewgts); } /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } max_dev = (step == 0) ? vwgt_max : 5 * vwgt_max; total_weight = 0; for (i = 0; i < nsets; i++) { total_weight += real_goal[i]; } if (max_dev > total_weight) max_dev = vwgt_max; goal_weight = total_weight * KL_IMBALANCE / nsets; if (goal_weight > max_dev) { max_dev = goal_weight; } if (!COARSEN_VWGTS) { count_weights(graph, nvtxs, assignment, nsets + 1, weights, (vwgt_max != 1)); } if (COARSE_KLV) { klvspiff(graph, nvtxs, assignment, real_goal, max_dev, &bndy_list, weights); } if (COARSE_BPM) { bpm_improve(graph, assignment, real_goal, max_dev, &bndy_list, weights, using_vwgts); } if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(real_term_wgts[1]); } if (LIMIT_KL_EWGTS) restore_ewgts(graph, nvtxs); } *pbndy_list = bndy_list; if (twptr_save != NULL) { sfree(twptr_save); twptr_save = NULL; } /* Free the space that was allocated. */ if (ccoords != NULL) { for (i = 0; i < igeom; i++) sfree(ccoords[i]); sfree(ccoords); } if (DEBUG_COARSEN > 0) { printf(" Leaving coarsen_klv, step=%d\n", step); } }