void *array_alloc_2D_ret(int dim1, int dim2, size_t size) /* size of first dimension */ /* size of second dimension */ /* size of array elements */ { size_t total; /* Total size of the array */ int aligned_dim; /* dim1 or dim1+1 to ensure data alignement */ int offset; /* offset of array elements */ char *field; /* The multi-dimensional array */ char **ptr; /* Pointer offset */ char *data; /* Data offset */ int j; /* loop counter */ aligned_dim = (dim1 % 2) ? dim1 + 1 : dim1; offset = aligned_dim * sizeof(void *); total = offset + dim1 * dim2 * size; field = smalloc_ret(total); if (field != NULL) { ptr = (char **)field; data = (char *)field; data += offset; for (j = 0; j < dim1; j++) { ptr[j] = data + j * size * dim2; } } return (field); }
/* Allocates a float vector with range [nl..nh]. Returns error code. */ float *mkvec_ret_float(int nl, int nh) { float *v; v = smalloc_ret((nh - nl + 1) * sizeof(float)); if (v == NULL) return(NULL); else return (v - nl); }
/* Allocates a double vector with range [nl..nh]. Returns error code. */ double *mkvec_ret(int nl, int nh) { double *v; v = smalloc_ret((nh - nl + 1) * sizeof(double)); if (v == NULL) return(NULL); else return (v - nl); }
int klv_init ( struct bilist ***lbucket_ptr, /* space for left bucket sorts */ struct bilist ***rbucket_ptr, /* space for right bucket sorts */ struct bilist **llistspace, /* space for elements of linked lists */ struct bilist **rlistspace, /* space for elements of linked lists */ int **ldvals, /* change in separator for left moves */ int **rdvals, /* change in separator for right moves */ int nvtxs, /* number of vertices in the graph */ int maxchange /* maximum change by moving a vertex */ ) { int sizeb; /* size of set of buckets */ int sizel; /* size of set of pointers for all vertices */ int flag; /* return code */ /* Allocate appropriate data structures for buckets, and listspace. */ sizeb = (2 * maxchange + 1) * sizeof(struct bilist *); *lbucket_ptr = smalloc_ret(sizeb); *rbucket_ptr = smalloc_ret(sizeb); *ldvals = smalloc_ret((nvtxs + 1) * sizeof(int)); *rdvals = smalloc_ret((nvtxs + 1) * sizeof(int)); sizel = (nvtxs + 1) * sizeof(struct bilist); *llistspace = smalloc_ret(sizel); *rlistspace = smalloc_ret(sizel); if (*lbucket_ptr == NULL || *rbucket_ptr == NULL || *ldvals == NULL || *rdvals == NULL || *llistspace == NULL || *rlistspace == NULL) { flag = 1; } else { flag = 0; } return(flag); }
int nway_klv(struct vtx_data **graph, /* data structure for graph */ int nvtxs, /* number of vtxs in graph */ struct bilist ** lbuckets, /* array of lists for bucket sort */ struct bilist ** rbuckets, /* array of lists for bucket sort */ struct bilist * llistspace, /* list data structure for each vertex */ struct bilist * rlistspace, /* list data structure for each vertex */ int * ldvals, /* d-values for each transition */ int * rdvals, /* d-values for each transition */ int * sets, /* processor each vertex is assigned to */ int maxdval, /* maximum d-value for a vertex */ double * goal, /* desired set sizes */ int max_dev, /* largest allowed deviation from balance */ int ** bndy_list, /* list of vertices on boundary (0 ends) */ double * weightsum /* sum of vweights in each set (in and out) */ ) { struct bilist **to_buckets; /* buckets I'm moving to */ struct bilist **from_buckets; /* buckets I'm moving from */ struct bilist * to_listspace; /* list structure I'm moving to */ struct bilist * from_listspace; /* list structure I'm moving from */ struct bilist * out_list; /* list of vtxs moved out of separator */ int * to_dvals; /* d-values I'm moving to */ int * from_dvals; /* d-values I'm moving from */ extern double kl_bucket_time; /* time spent in KL bucketsort */ extern int KL_BAD_MOVES; /* # bad moves in a row to stop KL */ extern int DEBUG_KL; /* debug flag for KL */ extern int KL_NTRIES_BAD; /* number of unhelpful passes before quitting */ extern int KL_MAX_PASS; /* maximum # outer KL loops */ int * bspace; /* list of active vertices for bucketsort */ int * edges; /* edge list for a vertex */ int * edges2; /* edge list for a vertex */ int * bdy_ptr; /* loops through bndy_list */ double total_weight; /* weight of all vertices */ double partial_weight; /* weight of vertices not in separator */ double ratio; /* fraction of weight not in separator */ double time; /* timing parameter */ double delta0; /* largest negative deviation from goal size */ double delta1; /* largest negative deviation from goal size */ double left_imbalance = 0.0; /* imbalance if I move to the left */ double right_imbalance; /* imbalance if I move to the right */ double balance_val; /* how imbalanced is it */ double balance_best; /* best balance yet if trying hard */ int flag; /* condition indicator */ int to = -1, from; /* sets moving into / out of */ int rtop, ltop; /* top of each set of buckets */ int * to_top; /* ptr to top of set moving to */ int lvtx, rvtx; /* next vertex to move left/right */ int lweight, rweight; /* weights of moving vertices */ int weightfrom = 0; /* weight moving out of a set */ int list_length; /* how long is list of vertices to bucketsort? */ int balanced; /* is partition balanced? */ int temp_balanced; /* is intermediate partition balanced? */ int ever_balanced; /* has any partition been balanced? */ int bestvtx = -1; /* best vertex to move */ int bestval = -1; /* best change in value for a vtx move */ int vweight; /* weight of best vertex */ int gtotal; /* sum of changes from moving */ int improved; /* total improvement from KL */ double bestg; /* maximum gtotal found in KL loop */ double bestg_min; /* smaller than any possible bestg */ int beststep; /* step where maximum value occurred */ int bestlength; /* step where maximum value occurred */ int neighbor; /* neighbor of a vertex */ int step_cutoff; /* number of negative steps in a row allowed */ int cost_cutoff; /* amount of negative d-values allowed */ int neg_steps; /* number of negative steps in a row */ int neg_cost; /* decrease in sum of d-values */ int vtx; /* vertex number */ int dval; /* dval of a vertex */ int group, group2; /* set that a vertex is assigned to */ int left_too_big; /* is left set too large? */ int right_too_big; /* is right set too large? */ int vwgt; /* weight of a vertex */ int gain; /* reduction in separator due to a move */ int neighbor2; /* neighbor of a vertex */ int step; /* loops through movements of vertices */ int parity; /* sort forwards or backwards? */ int done; /* has termination criteria been achieved? */ int nbad; /* number of unhelpful passes in a row */ int npass; /* total number of passes */ int nbadtries; /* number of unhelpful passes before quitting */ int enforce_balance; /* force a balanced partition? */ int enforce_balance_hard; /* really force a balanced partition? */ int i, j, k; /* loop counters */ double seconds(), drandom(); int make_sep_list(); void bucketsortsv(), clear_dvals(), p1bucket(); void removebilist(), movebilist(), add2bilist(); nbadtries = KL_NTRIES_BAD; enforce_balance = FALSE; enforce_balance_hard = FALSE; total_weight = goal[0] + goal[1]; bspace = smalloc_ret((nvtxs + 1) * sizeof(int)); if (bspace == NULL) { return (1); } bdy_ptr = *bndy_list; list_length = 0; while (*bdy_ptr != 0) { bspace[list_length++] = *bdy_ptr++; } sfree(*bndy_list); clear_dvals(graph, nvtxs, ldvals, rdvals, bspace, list_length); step_cutoff = KL_BAD_MOVES; cost_cutoff = maxdval * step_cutoff / 7; if (cost_cutoff < step_cutoff) cost_cutoff = step_cutoff; partial_weight = weightsum[0] + weightsum[1]; ratio = partial_weight / total_weight; delta0 = fabs(weightsum[0] - goal[0] * ratio); delta1 = fabs(weightsum[1] - goal[1] * ratio); balanced = (delta0 + delta1 <= max_dev) && weightsum[0] != total_weight && weightsum[1] != total_weight; bestg_min = -2.0 * nvtxs * maxdval; parity = FALSE; nbad = 0; npass = 0; improved = 0; done = FALSE; while (!done) { npass++; ever_balanced = FALSE; balance_best = delta0 + delta1; /* Initialize various quantities. */ ltop = rtop = 2 * maxdval; gtotal = 0; bestg = bestg_min; beststep = -1; bestlength = list_length; out_list = NULL; neg_steps = 0; /* Compute the initial d-values, and bucket-sort them. */ time = seconds(); bucketsortsv(graph, nvtxs, lbuckets, rbuckets, llistspace, rlistspace, ldvals, rdvals, sets, maxdval, parity, bspace, list_length); parity = !parity; kl_bucket_time += seconds() - time; if (DEBUG_KL > 2) { printf("After sorting, left buckets:\n"); p1bucket(lbuckets, llistspace, maxdval); printf(" right buckets:\n"); p1bucket(rbuckets, rlistspace, maxdval); } /* Now determine the set of vertex moves. */ for (step = 1;; step++) { /* Find the highest d-value in each set. */ /* But only consider moves from large to small sets, or moves */ /* in which balance is preserved. */ /* Break ties in some nonarbitrary manner. */ bestval = -maxdval - 1; partial_weight = weightsum[0] + weightsum[1]; ratio = partial_weight / total_weight; left_too_big = (weightsum[0] > (goal[0] + .5 * max_dev) * ratio); right_too_big = (weightsum[1] > (goal[1] + .5 * max_dev) * ratio); while (ltop >= 0 && lbuckets[ltop] == NULL) { --ltop; } if (ltop >= 0 && !left_too_big) { lvtx = ((long)lbuckets[ltop] - (long)llistspace) / sizeof(struct bilist); lweight = graph[lvtx]->vwgt; rweight = lweight - (ltop - maxdval); weightfrom = rweight; to = 0; bestvtx = lvtx; bestval = ltop - maxdval; partial_weight = weightsum[0] + lweight + weightsum[1] - rweight; ratio = partial_weight / total_weight; left_imbalance = max(fabs(weightsum[0] + lweight - goal[0] * ratio), fabs(weightsum[1] - rweight - goal[1] * ratio)); } while (rtop >= 0 && rbuckets[rtop] == NULL) { --rtop; } if (rtop >= 0 && !right_too_big) { rvtx = ((long)rbuckets[rtop] - (long)rlistspace) / sizeof(struct bilist); rweight = graph[rvtx]->vwgt; lweight = rweight - (rtop - maxdval); partial_weight = weightsum[0] - lweight + weightsum[1] + rweight; ratio = partial_weight / total_weight; right_imbalance = max(fabs(weightsum[0] - lweight - goal[0] * ratio), fabs(weightsum[1] + rweight - goal[1] * ratio)); if (rtop - maxdval > bestval || (rtop - maxdval == bestval && (right_imbalance < left_imbalance || (right_imbalance == left_imbalance && drandom() < .5)))) { to = 1; weightfrom = lweight; bestvtx = rvtx; bestval = rtop - maxdval; } } if (bestval == -maxdval - 1) { /* No allowed moves */ if (DEBUG_KL > 0) { printf("No KLV moves at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } break; } if (to == 0) { from = 1; to_listspace = llistspace; from_listspace = rlistspace; to_dvals = ldvals; from_dvals = rdvals; to_buckets = lbuckets; from_buckets = rbuckets; to_top = <op; } else { from = 0; to_listspace = rlistspace; from_listspace = llistspace; to_dvals = rdvals; from_dvals = ldvals; to_buckets = rbuckets; from_buckets = lbuckets; to_top = &rtop; } vweight = graph[bestvtx]->vwgt; weightsum[to] += vweight; weightsum[from] -= weightfrom; /* Check if this partition is balanced. */ partial_weight = weightsum[0] + weightsum[1]; ratio = partial_weight / total_weight; delta0 = fabs(weightsum[0] - goal[0] * ratio); delta1 = fabs(weightsum[1] - goal[1] * ratio); temp_balanced = (delta0 + delta1 <= max_dev) && weightsum[0] != total_weight && weightsum[1] != total_weight; ever_balanced = (ever_balanced || temp_balanced); balance_val = delta0 + delta1; gtotal += bestval; if ((gtotal > bestg && temp_balanced) || (enforce_balance_hard && balance_val < balance_best)) { bestg = gtotal; beststep = step; if (balance_val < balance_best) { balance_best = balance_val; } if (temp_balanced) { enforce_balance_hard = FALSE; } } if (DEBUG_KL > 1) { printf("At KLV step %d, bestvtx=%d, bestval=%d (2->%d), wt0 = %g, wt1 = %g\n", step, bestvtx, bestval, to, weightsum[0], weightsum[1]); } /* Monitor the stopping criteria. */ if (bestval < 0) { if (!enforce_balance || ever_balanced) neg_steps++; if (bestg != bestg_min) neg_cost = bestg - gtotal; else neg_cost = -maxdval - 1; if ((neg_steps > step_cutoff || neg_cost > cost_cutoff) && !(enforce_balance && bestg == bestg_min)) { if (DEBUG_KL > 0) { if (neg_steps > step_cutoff) { printf("KLV step cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } else if (neg_cost > cost_cutoff) { printf("KLV cost cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } } gtotal -= bestval; weightsum[to] -= vweight; weightsum[from] += weightfrom; break; } } else if (bestval > 0) { neg_steps = 0; } /* Remove vertex from its buckets, and flag it as finished. */ sets[bestvtx] = to; removebilist(&to_listspace[bestvtx], &to_buckets[bestval + maxdval]); /* printf("After to removebilist\n"); p1bucket(to_buckets, to_listspace, maxdval); */ if (from_dvals[bestvtx] != -maxdval - 1) { removebilist(&from_listspace[bestvtx], &from_buckets[from_dvals[bestvtx] + maxdval]); /* printf("After from removebilist\n"); p1bucket(from_buckets, from_listspace, maxdval); */ } from_dvals[bestvtx] = -maxdval - 1; /* Now keep track of vertices moved out of separator so */ /* I can restore them as needed. */ llistspace[bestvtx].next = out_list; out_list = &(llistspace[bestvtx]); /* Now update the d-values of all the neighbors */ /* And neighbors of neighbors ... */ /* If left move: 1. Separator neighbors right gain => infinity 2. Left neighbors unaffected. 3. Right neighbors move into separator. A. Right gain = infinity. B. Left gain = computed. C. For any of their neighbors in separator increase left gain. */ edges = graph[bestvtx]->edges; for (j = graph[bestvtx]->nedges - 1; j; j--) { neighbor = *(++edges); group = sets[neighbor]; if (group == 2) { /* In separator. */ gain = from_dvals[neighbor] + maxdval; /* Gain in the from direction => -infinity */ if (gain >= 0) { removebilist(&from_listspace[neighbor], &from_buckets[gain]); /* printf("\n After removing %d\n", neighbor); p1bucket(from_buckets, from_listspace, maxdval); */ from_dvals[neighbor] = -maxdval - 1; } } else if (group == from) { /* Gain in the from direction => -infinity */ sets[neighbor] = 2; from_dvals[neighbor] = -maxdval - 1; if (to == 0) { bspace[list_length++] = -neighbor; } else { bspace[list_length++] = neighbor; } edges2 = graph[neighbor]->edges; vwgt = graph[neighbor]->vwgt; gain = graph[neighbor]->vwgt; flag = FALSE; for (k = graph[neighbor]->nedges - 1; k; k--) { neighbor2 = *(++edges2); group2 = sets[neighbor2]; if (group2 == 2) { dval = to_dvals[neighbor2] + maxdval; if (dval >= 0) { movebilist(&to_listspace[neighbor2], &to_buckets[dval], &to_buckets[dval + vwgt]); /* printf("\n After moving %d from bucket %d to bucket %d\n", neighbor2, dval, dval + vwgt); p1bucket(to_buckets, to_listspace, maxdval); */ to_dvals[neighbor2] += vwgt; dval += vwgt; if (dval > *to_top) { *to_top = dval; } } } else if (group2 == from) { gain -= graph[neighbor2]->vwgt; if (to_dvals[neighbor2] + maxdval < 0) { flag = TRUE; } } } if (flag) { /* Not allowed to move further. */ to_dvals[neighbor] = -maxdval - 1; } else { to_dvals[neighbor] = gain; /* place in appropriate bucket */ gain += maxdval; add2bilist(&to_listspace[neighbor], &to_buckets[gain]); /* printf("\nAfter adding %d to bucket %d\n", neighbor, gain - maxdval); p1bucket(to_buckets, to_listspace, maxdval); */ if (gain > *to_top) *to_top = gain; } } } if (beststep == step) { bestlength = list_length; } if (DEBUG_KL > 2) { printf("\n-- After step, left buckets:\n"); p1bucket(lbuckets, llistspace, maxdval); printf(" right buckets:\n"); p1bucket(rbuckets, rlistspace, maxdval); } } /* Done with a pass; should we actually perform any swaps? */ if (bestg > 0 || (bestg != bestg_min && !balanced && enforce_balance)) { improved += bestg; } else { if (enforce_balance_hard) { /* I've done the best I can, give up. */ done = TRUE; } if (enforce_balance) { enforce_balance_hard = TRUE; } enforce_balance = TRUE; nbad++; } /* Work backwards, undoing all the undesirable moves. */ /* First reset vertices moved out of the separator. */ if (out_list) { if (beststep < 0) beststep = 0; for (i = step - 1; i > beststep; i--) { vtx = ((long)out_list - (long)llistspace) / sizeof(struct bilist); if (sets[vtx] != 2) { weightsum[sets[vtx]] -= graph[vtx]->vwgt; } sets[vtx] = 2; out_list = out_list->next; } } for (i = list_length - 1; i >= bestlength; i--) { vtx = bspace[i]; if (vtx < 0) { if (sets[-vtx] == 2) { weightsum[1] += graph[-vtx]->vwgt; } sets[-vtx] = 1; } else { if (sets[vtx] == 2) { weightsum[0] += graph[vtx]->vwgt; } sets[vtx] = 0; } } partial_weight = weightsum[0] + weightsum[1]; ratio = partial_weight / total_weight; delta0 = fabs(weightsum[0] - goal[0] * ratio); delta1 = fabs(weightsum[1] - goal[1] * ratio); balanced = (delta0 + delta1 <= max_dev) && weightsum[0] != total_weight && weightsum[1] != total_weight; done = done || (nbad >= nbadtries && balanced); if (KL_MAX_PASS > 0) { done = done || (npass == KL_MAX_PASS && balanced); } if (!done) { /* Rezero dval values. */ clear_dvals(graph, nvtxs, ldvals, rdvals, bspace, list_length); } /* Construct list of separator vertices to pass to buckets or return */ list_length = make_sep_list(bspace, list_length, sets); if (done) { bspace[list_length] = 0; bspace = srealloc(bspace, (list_length + 1) * sizeof(int)); *bndy_list = bspace; } gain = 0; j = k = 0; for (i = 1; i <= nvtxs; i++) { if (sets[i] == 0) j += graph[i]->vwgt; else if (sets[i] == 1) k += graph[i]->vwgt; else if (sets[i] == 2) gain += graph[i]->vwgt; } /* printf("\nAfter pass of KLV: sets = %d/%d, sep = %d (bestg = %g)\n\n\n", j, k, gain, bestg); */ } if (DEBUG_KL > 0) { printf(" KLV required %d passes to improve by %d.\n", npass, improved); } return (0); }
/* Construct a weighted quotient graph representing the inter-set communication. */ int make_comm_graph ( struct vtx_data ***pcomm_graph, /* graph for communication requirements */ struct vtx_data **graph, /* graph data structure */ int nvtxs, /* number of vertices in graph */ int using_ewgts, /* are edge weights being used? */ int *assign, /* current assignment */ int nsets_tot /* total number of sets */ ) { float ewgt; /* edge weight in graph */ int **edges_list = NULL; /* lists of edges */ int **ewgts_list = NULL; /* lists of edge weights */ int *edges = NULL; /* edges in communication graph */ int *ewgts = NULL; /* edge weights in communication graph */ float *float_ewgts = NULL; /* edge weights in floating point */ int *adj_sets = NULL; /* weights connecting sets */ int *order = NULL; /* ordering of vertices by set */ int *sizes = NULL; /* sizes of different sets */ int *start = NULL; /* pointers into adjacency data */ int *adjacency = NULL; /* array with all the edge info */ int *eptr = NULL; /* loops through edges in graph */ int *ewptr = NULL; /* loop through edge weights */ int set, set2; /* sets two vertices belong to */ int vertex; /* vertex in graph */ int ncomm_edges; /* number of edges in communication graph */ int error; /* out of space? */ int i, j; /* loop counters */ int reformat(); error = 1; *pcomm_graph = NULL; /* First construct some mappings to ease later manipulations. */ sizes = smalloc_ret(nsets_tot * sizeof(int)); if (sizes == NULL) goto skip; for (i = 0; i < nsets_tot; i++) sizes[i] = 0; for (i = 1; i <= nvtxs; i++) ++(sizes[assign[i]]); /* Now make sizes reflect the start index for each set. */ for (i = 1; i < nsets_tot - 1; i++) sizes[i] += sizes[i - 1]; for (i = nsets_tot - 1; i; i--) sizes[i] = sizes[i - 1]; sizes[0] = 0; /* Now construct list of all vertices in set 0, all in set 1, etc. */ order = smalloc_ret(nvtxs * sizeof(int)); if (order == NULL) goto skip; for (i = 1; i <= nvtxs; i++) { set = assign[i]; order[sizes[set]] = i; ++sizes[set]; } /* For each set, find total weight to all neighbors. */ adj_sets = smalloc_ret(nsets_tot * sizeof(int)); edges_list = smalloc_ret(nsets_tot * sizeof(int *)); ewgts_list = smalloc_ret(nsets_tot * sizeof(int *)); start = smalloc_ret((nsets_tot + 1) * sizeof(int)); if (adj_sets == NULL || edges_list == NULL || ewgts_list == NULL || start == NULL) goto skip; start[0] = 0; ewgt = 1; ncomm_edges = 0; for (set = 0; set < nsets_tot; set++) { edges_list[set] = NULL; ewgts_list[set] = NULL; } for (set = 0; set < nsets_tot; set++) { for (i = 0; i < nsets_tot; i++) adj_sets[i] = 0; for (i = (set ? sizes[set - 1] : 0); i < sizes[set]; i++) { vertex = order[i]; for (j = 1; j < graph[vertex]->nedges; j++) { set2 = assign[graph[vertex]->edges[j]]; if (set2 != set) { if (using_ewgts) ewgt = graph[vertex]->ewgts[j]; adj_sets[set2] += ewgt; } } } /* Now save adj_sets data to later construct graph. */ j = 0; for (i = 0; i < nsets_tot; i++) if (adj_sets[i]) j++; ncomm_edges += j; start[set + 1] = ncomm_edges; if (j) { edges_list[set] = edges = smalloc_ret(j * sizeof(int)); ewgts_list[set] = ewgts = smalloc_ret(j * sizeof(int)); if (edges == NULL || ewgts == NULL) goto skip; } j = 0; for (i = 0; i < nsets_tot; i++) { if (adj_sets[i]) { edges[j] = i + 1; ewgts[j] = adj_sets[i]; j++; } } } sfree(adj_sets); sfree(order); sfree(sizes); adj_sets = order = sizes = NULL; /* I now need to pack the edge and weight data into single arrays. */ adjacency = smalloc_ret((ncomm_edges + 1) * sizeof(int)); float_ewgts = smalloc_ret((ncomm_edges + 1) * sizeof(float)); if (adjacency == NULL || float_ewgts == NULL) goto skip; for (set = 0; set < nsets_tot; set++) { j = start[set]; eptr = edges_list[set]; ewptr = ewgts_list[set]; for (i = start[set]; i < start[set + 1]; i++) { adjacency[i] = eptr[i - j]; float_ewgts[i] = ewptr[i - j]; } if (start[set] != start[set + 1]) { sfree(edges_list[set]); sfree(ewgts_list[set]); } } sfree(edges_list); sfree(ewgts_list); edges_list = ewgts_list = NULL; error = reformat(start, adjacency, nsets_tot, &ncomm_edges, (int *) NULL, float_ewgts, pcomm_graph); skip: sfree(adj_sets); sfree(order); sfree(sizes); if (edges_list != NULL) { for (set = nsets_tot-1; set >= 0; set--) { if (edges_list[set] != NULL) sfree(edges_list[set]); } sfree(edges_list); } if (ewgts_list != NULL) { for (set = nsets_tot-1; set >= 0; set--) { if (ewgts_list[set] != NULL) sfree(ewgts_list[set]); } sfree(ewgts_list); } sfree(float_ewgts); sfree(adjacency); sfree(start); return(error); }
int interface ( int nvtxs, /* number of vertices in full graph */ int *start, /* start of edge list for each vertex */ int *adjacency, /* edge list data */ int *vwgts, /* weights for all vertices */ float *ewgts, /* weights for all edges */ float *x, float *y, float *z, /* coordinates for inertial method */ char *outassignname, /* name of assignment output file */ char *outfilename, /* output file name */ int *assignment, /* set number of each vtx (length n) */ int architecture, /* 0 => hypercube, d => d-dimensional mesh */ int ndims_tot, /* total number of cube dimensions to divide */ int mesh_dims[3], /* dimensions of mesh of processors */ double *goal, /* desired set sizes for each set */ int global_method, /* global partitioning algorithm */ int local_method, /* local partitioning algorithm */ int rqi_flag, /* should I use RQI/Symmlq eigensolver? */ int vmax, /* how many vertices to coarsen down to? */ int ndims, /* number of eigenvectors (2^d sets) */ double eigtol, /* tolerance on eigenvectors */ long seed /* for random graph mutations */ ) { extern char *PARAMS_FILENAME; /* name of file with parameter updates */ extern int MAKE_VWGTS; /* make vertex weights equal to degrees? */ extern int MATCH_TYPE; /* matching routine to use */ extern int FREE_GRAPH; /* free graph data structure after reformat? */ extern int DEBUG_PARAMS; /* debug flag for reading parameters */ extern int DEBUG_TRACE; /* trace main execution path */ extern double start_time; /* time routine is entered */ extern double reformat_time;/* time spent reformatting graph */ FILE *params_file=NULL; /* file for reading new parameters */ struct vtx_data **graph; /* graph data structure */ double vwgt_sum; /* sum of vertex weights */ double time; /* timing variable */ float **coords; /* coordinates for vertices if used */ int *vptr; /* loops through vertex weights */ int flag; /* return code from balance */ int nedges; /* number of edges in graph */ int using_vwgts; /* are vertex weights being used? */ int using_ewgts; /* are edge weights being used? */ int nsets_tot=0; /* total number of sets being created */ int igeom; /* geometric dimension for inertial method */ int default_goal; /* using default goals? */ int i; /* loop counter */ double seconds(); int reformat(); void free_graph(), read_params(), strout(); if (DEBUG_TRACE > 0) { printf("<Entering interface>\n"); } flag = 0; graph = NULL; coords = NULL; if (!Using_Main) { /* If not using main, need to read parameters file. */ start_time = seconds(); params_file = fopen(PARAMS_FILENAME, "r"); if (params_file == NULL && DEBUG_PARAMS > 1) { printf("Parameter file `%s' not found; using default parameters.\n", PARAMS_FILENAME); } read_params(params_file); } if (goal == NULL) { /* If not passed in, default goals have equal set sizes. */ default_goal = TRUE; if (architecture == 0) nsets_tot = 1 << ndims_tot; else if (architecture == 1) nsets_tot = mesh_dims[0]; else if (architecture == 2) nsets_tot = mesh_dims[0] * mesh_dims[1]; else if (architecture > 2) nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; if (MAKE_VWGTS && start != NULL) { vwgt_sum = start[nvtxs] - start[0] + nvtxs; } else if (vwgts == NULL) { vwgt_sum = nvtxs; } else { vwgt_sum = 0; vptr = vwgts; for (i = nvtxs; i; i--) vwgt_sum += *(vptr++); } vwgt_sum /= nsets_tot; goal = smalloc_ret(nsets_tot * sizeof(double)); if (goal == NULL) { strout("\nERROR: No room to make goals.\n"); flag = 1; goto skip; } for (i = 0; i < nsets_tot; i++) goal[i] = vwgt_sum; } else { default_goal = FALSE; } if (MAKE_VWGTS) { /* Generate vertex weights equal to degree of node. */ if (vwgts != NULL) { strout("WARNING: Vertex weights being overwritten by vertex degrees."); } vwgts = smalloc_ret(nvtxs * sizeof(int)); if (vwgts == NULL) { strout("\nERROR: No room to make vertex weights.\n"); flag = 1; goto skip; } if (start != NULL) { for (i = 0; i < nvtxs; i++) vwgts[i] = 1 + start[i + 1] - start[i]; } else { for (i = 0; i < nvtxs; i++) vwgts[i] = 1; } } using_vwgts = (vwgts != NULL); using_ewgts = (ewgts != NULL); if (start != NULL || vwgts != NULL) { /* Reformat into our data structure. */ time = seconds(); flag = reformat(start, adjacency, nvtxs, &nedges, vwgts, ewgts, &graph); if (flag) { strout("\nERROR: No room to reformat graph.\n"); goto skip; } reformat_time += seconds() - time; } else { nedges = 0; } if (FREE_GRAPH) { /* Free old graph data structures. */ free(start); free(adjacency); if (vwgts != NULL) free(vwgts); if (ewgts != NULL) free(ewgts); start = NULL; adjacency = NULL; vwgts = NULL; ewgts = NULL; } if (global_method == 3 || (MATCH_TYPE == 5 && (global_method == 1 || (global_method == 2 && rqi_flag)))) { if (x == NULL) { igeom = 0; } else { /* Set up coordinate data structure. */ coords = smalloc_ret(3 * sizeof(float *)); if (coords == NULL) { strout("\nERROR: No room to make coordinate array.\n"); flag = 1; goto skip; } /* Minus 1's are to allow remainder of program to index with 1. */ coords[0] = x - 1; igeom = 1; if (y != NULL) { coords[1] = y - 1; igeom = 2; if (z != NULL) { coords[2] = z - 1; igeom = 3; } } } } else { igeom = 0; } /* Subtract from assignment to allow code to index from 1. */ assignment = assignment - 1; flag = submain(graph, nvtxs, nedges, using_vwgts, using_ewgts, igeom, coords, outassignname, outfilename, assignment, goal, architecture, ndims_tot, mesh_dims, global_method, local_method, rqi_flag, vmax, ndims, eigtol, seed); skip: if (coords != NULL) sfree(coords); if (default_goal) sfree(goal); if (graph != NULL) free_graph(graph); if (flag && FREE_GRAPH) { sfree(start); sfree(adjacency); sfree(vwgts); sfree(ewgts); } if (!Using_Main && params_file != NULL) fclose(params_file); return (flag); }
/* Greedily increase the number of internal vtxs in each set. */ void force_internal ( struct vtx_data **graph, /* graph data structure */ int nvtxs, /* number of vertices in graph */ int using_ewgts, /* are edge weights being used? */ int *assign, /* current assignment */ double *goal, /* desired set sizes */ int nsets_tot, /* total number of sets */ int npasses_max /* number of passes to make */ ) { extern int DEBUG_TRACE; /* trace main execution path? */ extern int DEBUG_INTERNAL; /* turn on debugging code here? */ struct bidint *prev; /* back pointer for setting up lists */ struct bidint *int_list = NULL; /* internal vwgt in each set */ struct bidint *vtx_elems = NULL; /* linked lists of vtxs in each set */ struct bidint *set_list = NULL; /* headers for vtx_elems lists */ double *internal_vwgt = NULL; /* total internal vwgt in each set */ int *total_vwgt = NULL; /* total vertex weight in each set */ int *indices = NULL; /* orders sets by internal vwgt */ int *locked = NULL; /* is vertex allowed to switch sets? */ int internal; /* is a vertex internal or not? */ int *space = NULL; /* space for mergesort */ int npasses; /* number of callse to improve_internal */ int nlocked; /* number of vertices that can't move */ int set, set2; /* sets two vertices belong to */ int any_change; /* did pass improve # internal vtxs? */ int niter; /* counts calls to improve_internal */ int vwgt_max; /* largest vertex weight in graph */ int progress; /* am I improving # internal vertices? */ int error; /* out of space? */ int size; /* array spacing */ int i, j; /* loop counters */ int improve_internal(); void mergesort(), check_internal(), strout(); error = 1; /* For each set, compute the total weight of internal vertices. */ if (DEBUG_TRACE > 0) { printf("<Entering force_internal>\n"); } indices = smalloc_ret(nsets_tot * sizeof(int)); internal_vwgt = smalloc_ret(nsets_tot * sizeof(double)); total_vwgt = smalloc_ret(nsets_tot * sizeof(int)); if (indices == NULL || internal_vwgt == NULL || total_vwgt == NULL) goto skip; for (set=0; set < nsets_tot; set++) { total_vwgt[set] = internal_vwgt[set] = 0; indices[set] = set; } vwgt_max = 0; for (i=1; i<=nvtxs; i++) { internal = TRUE; set = assign[i]; for (j = 1; j < graph[i]->nedges && internal; j++) { set2 = assign[graph[i]->edges[j]]; internal = (set2 == set); } total_vwgt[set] += graph[i]->vwgt; if (internal) { internal_vwgt[set] += graph[i]->vwgt; } if (graph[i]->vwgt > vwgt_max) { vwgt_max = graph[i]->vwgt; } } /* Now sort all the internal_vwgt values. */ space = smalloc_ret(nsets_tot * sizeof(int)); if (space == NULL) goto skip; mergesort(internal_vwgt, nsets_tot, indices, space); sfree(space); space = NULL; /* Now construct a doubly linked list of sorted, internal_vwgt values. */ int_list = smalloc_ret((nsets_tot + 1) * sizeof(struct bidint)); if (int_list == NULL) goto skip; prev = &(int_list[nsets_tot]); prev->prev = NULL; for (i = 0; i < nsets_tot; i++) { set = indices[i]; int_list[set].prev = prev; int_list[set].val = internal_vwgt[set]; prev->next = &(int_list[set]); prev = &(int_list[set]); } prev->next = NULL; int_list[nsets_tot].val = -1; sfree(internal_vwgt); sfree(indices); internal_vwgt = NULL; indices = NULL; /* Set up convenient data structure for navigating through sets. */ set_list = smalloc_ret(nsets_tot * sizeof(struct bidint)); vtx_elems = smalloc_ret((nvtxs + 1) * sizeof(struct bidint)); if (set_list == NULL || vtx_elems == NULL) goto skip; for (i = 0; i < nsets_tot; i++) { set_list[i].next = NULL; } for (i = 1; i <= nvtxs; i++) { set = assign[i]; vtx_elems[i].next = set_list[set].next; if (vtx_elems[i].next != NULL) { vtx_elems[i].next->prev = &(vtx_elems[i]); } vtx_elems[i].prev = &(set_list[set]); set_list[set].next = &(vtx_elems[i]); } locked = smalloc_ret((nvtxs + 1) * sizeof(int)); if (locked == NULL) goto skip; nlocked = 0; size = (int) (&(int_list[1]) - &(int_list[0])); any_change = TRUE; npasses = 1; while (any_change && npasses <= npasses_max) { for (i = 1; i <= nvtxs; i++) { locked[i] = FALSE; } /* Now select top guy off the list and improve him. */ any_change = FALSE; progress = TRUE; niter = 1; while (progress) { prev = int_list[nsets_tot].next; set = ((int) (prev - int_list)) / size; if (DEBUG_INTERNAL > 0) { printf("Before iteration %d, nlocked = %d, int[%d] = %d\n", niter, nlocked, set, prev->val); } if (DEBUG_INTERNAL > 1) { check_internal(graph, nvtxs, int_list, set_list, vtx_elems, total_vwgt, assign, nsets_tot); } progress = improve_internal(graph, nvtxs, assign, goal, int_list, set_list, vtx_elems, set, locked, &nlocked, using_ewgts, vwgt_max, total_vwgt); if (progress) any_change = TRUE; niter++; } npasses++; } error = 0; skip: if (error) { strout("\nWARNING: No space to increase internal vertices."); strout(" NO INTERNAL VERTEX INCREASE PERFORMED.\n"); } sfree(internal_vwgt); sfree(indices); sfree(locked); sfree(total_vwgt); sfree(vtx_elems); sfree(int_list); sfree(set_list); }
int nway_kl ( struct vtx_data **graph, /* data structure for graph */ int nvtxs, /* number of vtxs in graph */ struct bilist ****buckets, /* array of lists for bucket sort */ struct bilist **listspace, /* list data structure for each vertex */ int **tops, /* 2-D array of top of each set of buckets */ int **dvals, /* d-values for each transition */ int *sets, /* processor each vertex is assigned to */ int maxdval, /* maximum d-value for a vertex */ int nsets, /* number of sets divided into */ double *goal, /* desired set sizes */ float *term_wgts[], /* weights for terminal propogation */ int (*hops)[MAXSETS], /* cost of set transitions */ int max_dev, /* largest allowed deviation from balance */ int using_ewgts, /* are edge weights being used? */ int **bndy_list, /* list of vertices on boundary (0 ends) */ double *startweight /* sum of vweights in each set (in and out) */ ) /* Suaris and Kedem algorithm for quadrisection, generalized to an */ /* arbitrary number of sets, with intra-set cost function specified by hops. */ /* Note: this is for a single divide step. */ /* Also, sets contains an intial (possibly crummy) partitioning. */ { extern double kl_bucket_time; /* time spent in KL bucketsort */ extern int KL_BAD_MOVES; /* # bad moves in a row to stop KL */ extern int DEBUG_KL; /* debug flag for KL */ extern int KL_RANDOM; /* use randomness in KL? */ extern int KL_NTRIES_BAD; /* number of unhelpful passes before quitting */ extern int KL_UNDO_LIST; /* should I back out of changes or start over? */ extern int KL_MAX_PASS; /* maximum number of outer KL loops */ extern double CUT_TO_HOP_COST; /* if term_prop; cut/hop importance */ struct bilist *movelist; /* list of vtxs to be moved */ struct bilist **endlist; /* end of movelists */ struct bilist *bestptr; /* best vertex in linked list */ struct bilist *bptr; /* loops through bucket list */ float *ewptr=NULL; /* loops through edge weights */ double *locked=NULL; /* weight of vertices locked in a set */ double *loose=NULL; /* weight of vtxs that can move from a set */ int *bspace=NULL; /* list of active vertices for bucketsort */ double *weightsum=NULL; /* sum of vweights for each partition */ int *edges=NULL; /* edge list for a vertex */ int *bdy_ptr=NULL; /* loops through bndy_list */ double time; /* timing parameter */ double delta; /* desire of sets to change size */ double bestdelta=-1; /* strongest delta value */ double deltaplus; /* largest negative deviation from goal size */ double deltaminus; /* largest negative deviation from goal size */ int list_length; /* how long is list of vertices to bucketsort? */ int balanced; /* is partition balanced? */ int temp_balanced; /* is intermediate partition balanced? */ int ever_balanced; /* has any partition been balanced? */ int bestvtx=-1; /* best vertex to move */ int bestval=-1; /* best change in value for a vtx move */ int bestfrom=-1, bestto=-1; /* sets best vertex moves between */ int vweight; /* weight of best vertex */ int gtotal; /* sum of changes from moving */ int improved; /* total improvement from KL */ double balance_val=0.0; /* how imbalanced is it */ double balance_best; /* best balance yet if trying hard */ double bestg; /* maximum gtotal found in KL loop */ double bestg_min; /* smaller than any possible bestg */ int beststep; /* step where maximum value occurred */ int neighbor; /* neighbor of a vertex */ int step_cutoff; /* number of negative steps in a row allowed */ int cost_cutoff; /* amount of negative d-values allowed */ int neg_steps; /* number of negative steps in a row */ int neg_cost; /* decrease in sum of d-values */ int vtx; /* vertex number */ int dval; /* dval of a vertex */ int group; /* set that a vertex is assigned to */ double cut_cost; /* if term_prop; relative cut/hop importance */ int diff; /* change in a d-value */ int stuck1st, stuck2nd; /* how soon will moves be disallowed? */ int beststuck1=-1, beststuck2=-1; /* best stuck values for tie-breaking */ int eweight; /* a particular edge weight */ int worth_undoing; /* is it worth undoing list? */ float undo_frac; /* fraction of vtxs indicating worth of undoing */ int step; /* loops through movements of vertices */ int parity; /* sort forwards or backwards? */ int done; /* has termination criteria been achieved? */ int nbad; /* number of unhelpful passes in a row */ int npass; /* total number of passes */ int nbadtries; /* number of unhelpful passes before quitting */ int enforce_balance; /* force a balanced partition? */ int enforce_balance_hard; /* really force a balanced partition? */ int balance_trouble; /* even balance_hard isn't working */ int size; /* array spacing */ int i, j, k, l; /* loop counters */ double drandom(), seconds(); int make_kl_list(); void bucketsorts(), bucketsorts_bi(), bucketsort1(); void pbuckets(), removebilist(), movebilist(), make_bndy_list(); nbadtries = KL_NTRIES_BAD; enforce_balance = FALSE; temp_balanced = FALSE; enforce_balance_hard = FALSE; balance_trouble = FALSE; size = (int) (&(listspace[0][1]) - &(listspace[0][0])); undo_frac = .3; cut_cost = 1; if (term_wgts[1] != NULL) { if (CUT_TO_HOP_COST > 1) { cut_cost = CUT_TO_HOP_COST; } } bspace = smalloc_ret(nvtxs * sizeof(int)); weightsum = smalloc_ret(nsets * sizeof(double)); locked = smalloc_ret(nsets * sizeof(double)); loose = smalloc_ret(nsets * sizeof(double)); if (bspace == NULL || weightsum == NULL || locked == NULL || loose == NULL) { sfree(loose); sfree(locked); sfree(weightsum); sfree(bspace); return(1); } if (*bndy_list != NULL) { bdy_ptr = *bndy_list; list_length = 0; while (*bdy_ptr != 0) { bspace[list_length++] = *bdy_ptr++; } sfree(*bndy_list); if (list_length == 0) { /* No boundary -> make everybody bndy. */ for (i = 0; i < nvtxs; i++) { bspace[i] = i + 1; } list_length = nvtxs; } /* Set dvals to flag uninitialized vertices. */ for (i = 1; i <= nvtxs; i++) { dvals[i][0] = 3 * maxdval; } } else { list_length = nvtxs; } step_cutoff = KL_BAD_MOVES; cost_cutoff = maxdval * step_cutoff / 7; if (cost_cutoff < step_cutoff) cost_cutoff = step_cutoff; deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (startweight[i] - goal[i] > deltaplus) { deltaplus = startweight[i] - goal[i]; } else if (goal[i] - startweight[i] > deltaminus) { deltaminus = goal[i] - startweight[i]; } } balanced = (deltaplus + deltaminus <= max_dev); bestg_min = -2.0 * nvtxs * maxdval; parity = FALSE; eweight = cut_cost + .5; nbad = 0; npass = 0; improved = 0; done = FALSE; while (!done) { npass++; ever_balanced = FALSE; /* Initialize various quantities. */ balance_best = 0; for (i = 0; i < nsets; i++) { for (j = 0; j < nsets; j++) tops[i][j] = 2 * maxdval; weightsum[i] = startweight[i]; loose[i] = weightsum[i]; locked[i] = 0; balance_best += goal[i]; } gtotal = 0; bestg = bestg_min; beststep = -1; movelist = NULL; endlist = &movelist; neg_steps = 0; /* Compute the initial d-values, and bucket-sort them. */ time = seconds(); if (nsets == 2) { bucketsorts_bi(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity, hops, bspace, list_length, npass, using_ewgts); } else { bucketsorts(graph, nvtxs, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, parity, hops, bspace, list_length, npass, using_ewgts); } parity = !parity; kl_bucket_time += seconds() - time; if (DEBUG_KL > 2) { pbuckets(buckets, listspace, maxdval, nsets); } /* Now determine the set of K-L moves. */ for (step = 1;; step++) { /* Find the highest d-value in each set. */ /* But only consider moves from large to small sets, or moves */ /* in which balance is preserved. */ /* Break ties in some nonarbitrary manner. */ bestval = -maxdval - 1; for (i = 0; i < nsets; i++) for (j = 0; j < nsets; j++) /* Only allow moves from large sets to small sets, or */ /* moves which preserve balance. */ if (i != j) { /* Find the best move from i to j. */ for (k = tops[i][j]; k >= 0 && buckets[i][j][k] == NULL; k--) ; tops[i][j] = k; if (k >= 0) { l = (j > i) ? j - 1 : j; vtx = ((int) (buckets[i][j][k] - listspace[l])) / size; vweight = graph[vtx]->vwgt; if ((enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j] && weightsum[i] - goal[i] - (weightsum[j] - goal[j]) > max_dev) || (!enforce_balance_hard && weightsum[i] >= goal[i] && weightsum[j] <= goal[j]) || (!enforce_balance_hard && weightsum[i] - vweight - goal[i] > -(double)((max_dev + 1) / 2) && weightsum[j] + vweight - goal[j] < (double)((max_dev + 1) / 2))) { /* Is it the best move seen so far? */ if (k - maxdval > bestval) { bestval = k - maxdval; bestvtx = vtx; bestto = j; /* DO I NEED ALL THIS DATA? Just to break ties. */ bestdelta = fabs(weightsum[i] - vweight - goal[i]) + fabs(weightsum[j] + vweight - goal[j]); beststuck1 = min(loose[i], goal[j] - locked[j]); beststuck2 = max(loose[i], goal[j] - locked[j]); } else if (k - maxdval == bestval) { /* Tied. Is better balanced than current best? */ /* If tied, move among sets with most freedom. */ stuck1st = min(loose[i], goal[j] - locked[j]); stuck2nd = max(loose[i], goal[j] - locked[j]); delta = fabs(weightsum[i] - vweight - goal[i]) + fabs(weightsum[j] + vweight - goal[j]); /* NOTE: Randomization in this check isn't ideal */ /* if more than two guys are tied. */ if (delta < bestdelta || (delta == bestdelta && (stuck1st > beststuck1 || (stuck1st == beststuck1 && (stuck2nd > beststuck2 || (stuck2nd == beststuck2 && (KL_RANDOM && drandom() < .5))))))) { bestval = k - maxdval; bestvtx = vtx; bestto = j; bestdelta = delta; beststuck1 = stuck1st; beststuck2 = stuck2nd; } } } } } if (bestval == -maxdval - 1) { /* No allowed moves */ if (DEBUG_KL > 0) { printf("No KL moves at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } break; } bestptr = &(listspace[0][bestvtx]); bestfrom = sets[bestvtx]; vweight = graph[bestvtx]->vwgt; weightsum[bestto] += vweight; weightsum[bestfrom] -= vweight; loose[bestfrom] -= vweight; locked[bestto] += vweight; if (enforce_balance) { /* Check if this partition is balanced. */ deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (weightsum[i] - goal[i] > deltaplus) { deltaplus = weightsum[i] - goal[i]; } else if (goal[i] - weightsum[i] > deltaminus) { deltaminus = goal[i] - weightsum[i]; } } balance_val = deltaminus + deltaplus; temp_balanced = (balance_val <= max_dev); ever_balanced = (ever_balanced || temp_balanced); } gtotal += bestval; if (((gtotal > bestg && (!enforce_balance || temp_balanced)) || (enforce_balance_hard && balance_val < balance_best)) && step != nvtxs) { bestg = gtotal; beststep = step; if (enforce_balance_hard) { balance_best = balance_val; } if (temp_balanced) { enforce_balance_hard = FALSE; } } if (DEBUG_KL > 1) { printf("At KL step %d, bestvtx=%d, bestval=%d (%d-> %d)\n", step, bestvtx, bestval, bestfrom, bestto); } /* Monitor the stopping criteria. */ if (bestval < 0) { if (!enforce_balance || ever_balanced) neg_steps++; if (bestg != bestg_min) neg_cost = bestg - gtotal; else neg_cost = -maxdval - 1; if ((neg_steps > step_cutoff || neg_cost > cost_cutoff) && !(enforce_balance && bestg == bestg_min) && (beststep != step)) { if (DEBUG_KL > 0) { if (neg_steps > step_cutoff) { printf("KL step cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } else if (neg_cost > cost_cutoff) { printf("KL cost cutoff at step %d. bestg = %g at step %d.\n", step, bestg, beststep); } } break; } } else if (bestval > 0) { neg_steps = 0; } /* Remove vertex from its buckets, and flag it as finished. */ l = 0; for (k = 0; k < nsets; k++) { if (k != bestfrom) { dval = dvals[bestvtx][l] + maxdval; removebilist(&listspace[l][bestvtx], &buckets[bestfrom][k][dval]); l++; } } /* Is there a better way to do this? */ sets[bestvtx] = -sets[bestvtx] - 1; /* Set up the linked list of moved vertices. */ bestptr->next = NULL; bestptr->prev = (struct bilist *) (unsigned long) bestto; *endlist = bestptr; endlist = &(bestptr->next); /* Now update the d-values of all the neighbors */ edges = graph[bestvtx]->edges; if (using_ewgts) ewptr = graph[bestvtx]->ewgts; for (j = graph[bestvtx]->nedges - 1; j; j--) { neighbor = *(++edges); if (using_ewgts) eweight = *(++ewptr) * cut_cost + .5; /* First make sure neighbor is alive. */ if (sets[neighbor] >= 0) { group = sets[neighbor]; if (dvals[neighbor][0] >= 3 * maxdval) { /* New vertex, not yet in buckets. */ /* Can't be neighbor of moved vtx, so compute */ /* inital dvals and buckets, then update. */ bucketsort1(graph, neighbor, buckets, listspace, dvals, sets, term_wgts, maxdval, nsets, hops, using_ewgts); } l = 0; for (k = 0; k < nsets; k++) { if (k != group) { diff = eweight * ( hops[k][bestfrom] - hops[group][bestfrom] + hops[group][bestto] - hops[k][bestto]); dval = dvals[neighbor][l] + maxdval; movebilist(&listspace[l][neighbor], &buckets[group][k][dval], &buckets[group][k][dval + diff]); dvals[neighbor][l] += diff; dval += diff; if (dval > tops[group][k]) tops[group][k] = dval; l++; } } } } if (DEBUG_KL > 2) { pbuckets(buckets, listspace, maxdval, nsets); } } /* Done with a pass; should we actually perform any swaps? */ bptr = movelist; if (bestg > 0 || (bestg != bestg_min && !balanced && enforce_balance) || (bestg != bestg_min && balance_trouble)) { improved += bestg; for (i = 1; i <= beststep; i++) { vtx = ((int) (bptr - listspace[0])) / size; bestto = (int) (unsigned long) bptr->prev; startweight[bestto] += graph[vtx]->vwgt; startweight[-sets[vtx] - 1] -= graph[vtx]->vwgt; sets[vtx] = (int) bestto; bptr = bptr->next; } deltaminus = deltaplus = 0; for (i = 0; i < nsets; i++) { if (startweight[i] - goal[i] > deltaplus) { deltaplus = startweight[i] - goal[i]; } else if (goal[i] - startweight[i] > deltaminus) { deltaminus = goal[i] - startweight[i]; } } /* printf(" deltaplus = %f, deltaminus = %f, max_dev = %d\n", deltaplus, deltaminus, max_dev); */ balanced = (deltaplus + deltaminus <= max_dev); } else { nbad++; } if (!balanced || bptr == movelist) { if (enforce_balance) { if (enforce_balance_hard) { balance_trouble = TRUE; } enforce_balance_hard = TRUE; } enforce_balance = TRUE; nbad++; } worth_undoing = (step < undo_frac * nvtxs); done = (nbad >= nbadtries && balanced); if (KL_MAX_PASS > 0) { done = done || (npass == KL_MAX_PASS && balanced); } if (!done) { /* Prepare for next pass. */ if (KL_UNDO_LIST && worth_undoing && !balance_trouble) { /* Make a list of modified vertices for next bucketsort. */ /* Also, ensure these vertices are removed from their buckets. */ list_length = make_kl_list(graph, movelist, buckets, listspace, sets, nsets, bspace, dvals, maxdval); } } if (done || !(KL_UNDO_LIST && worth_undoing && !balance_trouble)) { /* Restore set numbers of remaining, altered vertices. */ while (bptr != NULL) { vtx = ((int) (bptr - listspace[0])) / size; sets[vtx] = -sets[vtx] - 1; bptr = bptr->next; } list_length = nvtxs; } if (done && *bndy_list != NULL) { make_bndy_list(graph, movelist, buckets, listspace, sets, nsets, bspace, tops, bndy_list); } } if (DEBUG_KL > 0) { printf(" KL required %d passes to improve by %d.\n", npass, improved); } sfree(loose); sfree(locked); sfree(weightsum); sfree(bspace); return(0); }
int submain ( struct vtx_data **graph, /* data structure for graph */ int nvtxs, /* number of vertices in full graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertex weights being used? */ int using_ewgts, /* are edge weights being used? */ int igeom, /* geometry dimension if using inertial method */ float **coords, /* coordinates of vertices if used */ char *outassignname, /* name of assignment output file */ char *outfilename, /* in which to print output metrics */ int *assignment, /* set number of each vtx (length n) */ double *goal, /* desired sizes for each set */ int architecture, /* 0=> hypercube, d=> d-dimensional mesh */ int ndims_tot, /* total number hypercube dimensions */ int mesh_dims[3], /* extent of mesh in 3 directions */ int global_method, /* global partitioning algorithm */ int local_method, /* local partitioning algorithm */ int rqi_flag, /* use RQI/Symmlq eigensolver? */ int vmax, /* if so, how many vtxs to coarsen down to */ int ndims, /* number of eigenvectors (2^d sets) */ double eigtol, /* tolerance on eigenvectors */ long seed /* for random graph mutations */ ) { extern int ECHO; /* controls output to file or screen */ extern int CHECK_INPUT; /* should I check input for correctness? */ extern int SEQUENCE; /* just generate spectal ordering? */ extern int OUTPUT_ASSIGN; /* print assignment to a file? */ extern int OUTPUT_METRICS; /* controls formatting of output */ extern int PERTURB; /* perturb matrix if quad/octasection? */ extern int NSQRTS; /* number of square roots to precompute */ extern int KL_METRIC; /* KL interset cost: 1=>cuts, 2=>hops */ extern int LANCZOS_TYPE; /* type of Lanczos to use */ extern int REFINE_MAP; /* use greedy strategy to improve mapping? */ extern int REFINE_PARTITION;/* number of calls to pairwise_refine to make */ extern int VERTEX_COVER; /* use matching to reduce vertex separator? */ extern int CONNECTED_DOMAINS; /* force subdomain connectivity at end? */ extern int INTERNAL_VERTICES; /* greedily increase internal vtxs? */ extern int DEBUG_INTERNAL; /* debug code about force_internal? */ extern int DEBUG_REFINE_PART; /* debug code about refine_part? */ extern int DEBUG_REFINE_MAP; /* debug code about refine_map? */ extern int DEBUG_MACH_PARAMS; /* print out computed machine params? */ extern int DEBUG_TRACE; /* trace main execution path */ extern int PRINT_HEADERS; /* print section headings for output? */ extern int TIME_KERNELS; /* benchmark some numerical kernels? */ extern double start_time; /* time code was entered */ extern double total_time; /* (almost) total time spent in code */ extern double check_input_time; /* time spent checking input */ extern double partition_time; /* time spent partitioning graph */ extern double kernel_time; /* time spent benchmarking kernels */ extern double count_time; /* time spent evaluating the answer */ extern double print_assign_time; /* time spent writing output file */ FILE *outfile; /* output file */ struct vtx_data **graph2; /* data structure for graph */ int hop_mtx[MAXSETS][MAXSETS]; /* between-set hop cost for KL */ double *vwsqrt; /* sqrt of vertex weights (length nvtxs+1) */ double time, time1; /* timing variables */ char *graphname, *geomname; /* names of input files */ char *inassignname; /* name of assignment input file */ int old_nsqrts; /* old value of NSQRTS */ int append; /* append output to existing file? */ int nsets; /* number of sets created by each divide */ int nsets_tot; /* total number of sets */ int bits; /* used in computing hops */ int flag; /* return code from check_input */ int old_perturb=0; /* saves original pertubation flag */ int i, j, k; /* loop counters */ double seconds(); void setrandom(long int seed); int check_input(), refine_part(); void connect_enforce(); void setrandom(), makevwsqrt(), balance(), countup(); void force_internal(), sequence(), reflect_input(); void machine_params(), assign_out(), refine_map(); void time_out(), time_kernels(), strout(); if (DEBUG_TRACE > 0) { printf("<Entering submain>\n"); } /* First check all the input for consistency. */ if (architecture == 1) mesh_dims[1] = mesh_dims[2] = 1; else if (architecture == 2) mesh_dims[2] = 1; /* Check for simple special case of 1 processor. */ k = 0; if (architecture == 0) k = 1 << ndims_tot; else if (architecture > 0) k = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; if (k == 1) { for (i = 1; i <= nvtxs; i++) assignment[i] = 0; if (OUTPUT_ASSIGN > 0 && outassignname != NULL) { time1 = seconds(); assign_out(nvtxs, assignment, k, outassignname); print_assign_time += seconds() - time1; } return(0); } graphname = Graph_File_Name; geomname = Geometry_File_Name; inassignname = Assign_In_File_Name; /* Turn of perturbation if using bisection */ if (ndims == 1) { old_perturb = PERTURB; PERTURB = FALSE; } if (ECHO < 0 && outfilename != NULL) { /* Open output file */ outfile = fopen(outfilename, "r"); if (outfile != NULL) { append = TRUE; fclose(outfile); } else append = FALSE; outfile = fopen(outfilename, "a"); if (append) { fprintf(outfile, "\n------------------------------------------------\n\n"); } } else { outfile = NULL; } Output_File = outfile; if (outfile != NULL && PRINT_HEADERS) { fprintf(outfile, "\n Chaco 2.0\n"); fprintf(outfile, " Sandia National Laboratories\n\n"); } if (CHECK_INPUT) { /* Check the input for inconsistencies. */ time1 = seconds(); flag = check_input(graph, nvtxs, nedges, igeom, coords, graphname, assignment, goal, architecture, ndims_tot, mesh_dims, global_method, local_method, rqi_flag, &vmax, ndims, eigtol); check_input_time += seconds() - time1; if (flag) { strout("ERROR IN INPUT.\n"); return (1); } } if (ECHO != 0) { reflect_input(nvtxs, nedges, igeom, graphname, geomname, inassignname, outassignname, outfilename, architecture, ndims_tot, mesh_dims, global_method, local_method, rqi_flag, vmax, ndims, eigtol, seed, outfile); } if (PRINT_HEADERS) { printf("\n\nStarting to partition ...\n\n"); if (Output_File != NULL ) { fprintf(Output_File, "\n\nStarting to partition ... (residual, warning and error messages only)\n\n"); } } time = seconds(); /* Perform some one-time initializations. */ setrandom(seed); machine_params(&DOUBLE_EPSILON, &DOUBLE_MAX); if (DEBUG_MACH_PARAMS > 0) { printf("Machine parameters:\n"); printf(" DOUBLE_EPSILON = %e\n", DOUBLE_EPSILON); printf(" DOUBLE_MAX = %e\n", DOUBLE_MAX); } nsets = (1 << ndims); old_nsqrts = NSQRTS; if (nvtxs < NSQRTS && !using_vwgts) { NSQRTS = nvtxs; } SQRTS = smalloc_ret((NSQRTS + 1) * sizeof(double)); if (SQRTS == NULL) { strout("ERROR: No space to allocate sqrts\n"); return(1); } for (i = 1; i <= NSQRTS; i++) SQRTS[i] = sqrt((double) i); if (using_vwgts && (global_method == 1 || global_method == 2)) { vwsqrt = smalloc_ret((nvtxs + 1) * sizeof(double)); if (vwsqrt == NULL) { strout("ERROR: No space to allocate vwsqrt\n"); sfree(SQRTS); NSQRTS = old_nsqrts; return(1); } makevwsqrt(vwsqrt, graph, nvtxs); } else vwsqrt = NULL; if (TIME_KERNELS) { time1 = seconds(); time_kernels(graph, nvtxs, vwsqrt); kernel_time += seconds() - time1; } if (SEQUENCE) { sequence(graph, nvtxs, nedges, using_ewgts, vwsqrt, LANCZOS_TYPE, rqi_flag, vmax, eigtol); goto End_Label; } /* Initialize cost function for KL-spiff */ if (global_method == 1 || local_method == 1) { for (i = 0; i < nsets; i++) { hop_mtx[i][i] = 0; for (j = 0; j < i; j++) { if (KL_METRIC == 2) { /* Count hypercube hops */ hop_mtx[i][j] = 0; bits = i ^ j; while (bits) { if (bits & 1) { ++hop_mtx[i][j]; } bits >>= 1; } } else if (KL_METRIC == 1) { /* Count cut edges */ hop_mtx[i][j] = 1; } hop_mtx[j][i] = hop_mtx[i][j]; } }
void refine_map(struct vtx_data **graph, /* graph data structure */ int nvtxs, /* number of vertices in graph */ int using_ewgts, /* are edge weights being used? */ int * assign, /* current assignment */ int cube_or_mesh, /* 0 => hypercube, d => d-dimensional mesh */ int ndims_tot, /* if hypercube, number of dimensions */ int mesh_dims[3] /* if mesh, dimensions of mesh */ ) { struct vtx_data **comm_graph; /* graph for communication requirements */ int nsets_tot = 0; /* total number of sets */ int * vtx2node = NULL; /* mapping of comm_graph vtxs to processors */ int * node2vtx = NULL; /* mapping of sets to comm_graph vtxs */ double maxdesire = 0.0; /* largest possible desire to flip an edge */ int error = 0; /* out of space? */ int i; /* loop counter */ double find_maxdeg(); void free_graph(), strout(); int make_comm_graph(), refine_mesh(), refine_cube(); if (cube_or_mesh == 0) nsets_tot = 1 << ndims_tot; else if (cube_or_mesh == 1) nsets_tot = mesh_dims[0]; else if (cube_or_mesh == 2) nsets_tot = mesh_dims[0] * mesh_dims[1]; else if (cube_or_mesh == 3) nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; node2vtx = vtx2node = NULL; /* Construct the weighted quotient graph representing communication. */ error = make_comm_graph(&comm_graph, graph, nvtxs, using_ewgts, assign, nsets_tot); if (!error) { maxdesire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL); vtx2node = smalloc_ret((nsets_tot + 1) * sizeof(int)); node2vtx = smalloc_ret(nsets_tot * sizeof(int)); if (node2vtx == NULL || vtx2node == NULL) { error = 1; goto skip; } for (i = 1; i <= nsets_tot; i++) { vtx2node[i] = (int)i - 1; node2vtx[i - 1] = (int)i; } if (cube_or_mesh > 0) { error = refine_mesh(comm_graph, cube_or_mesh, mesh_dims, maxdesire, vtx2node, node2vtx); } else if (cube_or_mesh == 0) { error = refine_cube(comm_graph, ndims_tot, maxdesire, vtx2node, node2vtx); } if (!error) { for (i = 1; i <= nvtxs; i++) { assign[i] = vtx2node[assign[i] + 1]; } } } skip: if (error) { strout("\nWARNING: No space to refine mapping to processors."); strout(" NO MAPPING REFINEMENT PERFORMED.\n"); } sfree(node2vtx); sfree(vtx2node); free_graph(comm_graph); }
/* begin at 1 instead of at 0. */ int refine_mesh(struct vtx_data **comm_graph, /* graph for communication requirements */ int cube_or_mesh, /* number of dimensions in mesh */ int mesh_dims[3], /* dimensions of mesh */ double maxdesire, /* largest possible desire to flip an edge */ int * vtx2node, /* mapping from comm_graph vtxs to mesh nodes */ int * node2vtx /* mapping from mesh nodes to comm_graph vtxs */ ) { struct refine_vdata * vdata = NULL; /* desire data for all vertices */ struct refine_vdata * vptr; /* loops through vdata */ struct refine_edata * edata = NULL; /* desire data for all edges */ struct refine_edata * eguy; /* one element in edata array */ struct refine_edata **desire_ptr = NULL; /* array of desire buckets */ double * desires = NULL; /* each edge's inclination to flip */ int * indices = NULL; /* sorted list of desire values */ int * space = NULL; /* used for sorting disire values */ double best_desire; /* highest desire of edge to flip */ int imax; /* maxdesire rounded up */ int nsets_tot; /* total number of sets/processors */ int neighbor; /* neighboring vertex */ int dim; /* loops over mesh dimensions */ int nwires; /* number of wires in processor mesh */ int wire; /* loops through all wires */ int node1, node2; /* processors joined by a wire */ int vtx1, vtx2; /* corresponding vertices in comm_graph */ int loc1, loc2; /* location of vtxs in flipping dimension */ int error; /* out of space? */ int i, j, k; /* loop counter */ double find_maxdeg(); double compute_mesh_edata(); void compute_mesh_vdata(), init_mesh_edata(), mergesort(); void update_mesh_vdata(), update_mesh_edata(); error = 1; nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; imax = maxdesire; if (imax != maxdesire) imax++; vdata = (struct refine_vdata *)smalloc_ret((cube_or_mesh * nsets_tot + 1) * sizeof(struct refine_vdata)); if (vdata == NULL) goto skip; /* Compute each node's desires to move or stay put. */ vptr = vdata; for (dim = 0; dim < cube_or_mesh; dim++) { for (i = 1; i <= nsets_tot; i++) { compute_mesh_vdata(++vptr, comm_graph, i, vtx2node, mesh_dims, dim); } } nwires = (mesh_dims[0] - 1) * mesh_dims[1] * mesh_dims[2] + mesh_dims[0] * (mesh_dims[1] - 1) * mesh_dims[2] + mesh_dims[0] * mesh_dims[1] * (mesh_dims[2] - 1); edata = smalloc_ret((nwires + 1) * sizeof(struct refine_edata)); desires = smalloc_ret(nwires * sizeof(double)); if (vdata == NULL || desires == NULL) goto skip; /* Initialize all the edge values. */ init_mesh_edata(edata, mesh_dims); for (wire = 0; wire < nwires; wire++) { desires[wire] = edata[wire].swap_desire = compute_mesh_edata(&(edata[wire]), vdata, mesh_dims, comm_graph, node2vtx); } /* Set special value for end pointer. */ edata[nwires].swap_desire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL); /* I now need to sort all the wire preference values */ indices = smalloc_ret(nwires * sizeof(int)); space = smalloc_ret(nwires * sizeof(int)); if (indices == NULL || space == NULL) goto skip; mergesort(desires, nwires, indices, space); sfree(space); sfree(desires); space = NULL; desires = NULL; best_desire = (edata[indices[nwires - 1]]).swap_desire; /* Now construct a buckets of linked lists with desire values. */ if (best_desire > 0) { desire_ptr = (struct refine_edata **)smalloc_ret((2 * imax + 1) * sizeof(struct refine_edata *)); if (desire_ptr == NULL) goto skip; for (i = 2 * imax; i >= 0; i--) desire_ptr[i] = NULL; for (i = nwires - 1; i >= 0; i--) { eguy = &(edata[indices[i]]); /* Round the swap desire up. */ if (eguy->swap_desire >= 0) { k = eguy->swap_desire; if (k != eguy->swap_desire) k++; } else { k = -eguy->swap_desire; if (k != -eguy->swap_desire) k++; k = -k; } k += imax; eguy->prev = NULL; eguy->next = desire_ptr[k]; if (desire_ptr[k] != NULL) desire_ptr[k]->prev = eguy; desire_ptr[k] = eguy; } } else { desire_ptr = NULL; } sfree(indices); indices = NULL; loc1 = 0; loc2 = 0; /* Everything is now set up. Swap sets across wires until no more improvement. */ while (best_desire > 0) { k = best_desire + 1 + imax; if (k > 2 * imax) k = 2 * imax; while (k > imax && desire_ptr[k] == NULL) k--; eguy = desire_ptr[k]; dim = eguy->dim; node1 = eguy->node1; node2 = eguy->node2; vtx1 = node2vtx[node1]; vtx2 = node2vtx[node2]; if (dim == 0) { loc1 = node1 % mesh_dims[0]; loc2 = node2 % mesh_dims[0]; } else if (dim == 1) { loc1 = (node1 / mesh_dims[0]) % mesh_dims[1]; loc2 = (node2 / mesh_dims[0]) % mesh_dims[1]; } else if (dim == 2) { loc1 = node1 / (mesh_dims[0] * mesh_dims[1]); loc2 = node2 / (mesh_dims[0] * mesh_dims[1]); } /* Now swap the vertices. */ node2vtx[node1] = (int)vtx2; node2vtx[node2] = (int)vtx1; vtx2node[vtx1] = (int)node2; vtx2node[vtx2] = (int)node1; /* First update all the vdata fields for vertices effected by this flip. */ for (j = 1; j < comm_graph[vtx1]->nedges; j++) { neighbor = comm_graph[vtx1]->edges[j]; if (neighbor != vtx2) update_mesh_vdata(loc1, loc2, dim, comm_graph[vtx1]->ewgts[j], vdata, mesh_dims, neighbor, vtx2node); } for (j = 1; j < comm_graph[vtx2]->nedges; j++) { neighbor = comm_graph[vtx2]->edges[j]; if (neighbor != vtx1) update_mesh_vdata(loc2, loc1, dim, comm_graph[vtx2]->ewgts[j], vdata, mesh_dims, neighbor, vtx2node); } /* Now recompute all preferences for vertices that were moved. */ for (j = 0; j < cube_or_mesh; j++) { compute_mesh_vdata(&(vdata[j * nsets_tot + vtx1]), comm_graph, vtx1, vtx2node, mesh_dims, j); compute_mesh_vdata(&(vdata[j * nsets_tot + vtx2]), comm_graph, vtx2, vtx2node, mesh_dims, j); } /* Now I can update the values of all the edges associated with all the effected vertices. Note that these include mesh neighbors of node1 and node2 in addition to the dim-edges of graph neighbors of vtx1 and vtx2. */ /* For each neighbor vtx, look at -1 and +1 edge. If desire hasn't changed, return. Otherwise, pick him up and move him. Similarly for all directional neighbors of node1 and node2. */ for (j = 1; j < comm_graph[vtx1]->nedges; j++) { neighbor = comm_graph[vtx1]->edges[j]; if (neighbor != vtx2) update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } for (j = 1; j < comm_graph[vtx2]->nedges; j++) { neighbor = comm_graph[vtx2]->edges[j]; if (neighbor != vtx1) update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } for (j = 0; j < cube_or_mesh; j++) { update_mesh_edata(vtx1, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); update_mesh_edata(vtx2, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node, &best_desire, imax, desire_ptr); } k = best_desire + 1 + imax; if (k > 2 * imax) k = 2 * imax; while (k > imax && desire_ptr[k] == NULL) k--; best_desire = k - imax; } error = 0; skip: sfree(indices); sfree(space); sfree(desires); sfree(desire_ptr); sfree(vdata); sfree(edata); return (error); }
/* Construct a graph representing the inter-set communication. */ int refine_part ( struct vtx_data **graph, /* graph data structure */ int nvtxs, /* number of vertices in graph */ int using_ewgts, /* are edge weights being used? */ int *assign, /* current assignment */ int architecture, /* 0 => hypercube, d => d-dimensional mesh */ int ndims_tot, /* if hypercube, number of dimensions */ int mesh_dims[3], /* if mesh, size in each direction */ double *goal /* desired set sizes */ ) { extern int TERM_PROP; /* perform terminal propagation? */ struct bilist *set_list = NULL; /* lists of vtxs in each set */ struct bilist *vtx_elems = NULL; /* space for all vtxs in set_lists */ struct bilist *ptr = NULL; /* loops through set_lists */ struct ipairs *pairs = NULL;/* ordered list of edges in comm graph */ double *comm_vals = NULL; /* edge wgts of comm graph for sorting */ float *term_wgts[2]; /* terminal propagation vector */ int hops[MAXSETS][MAXSETS]; /* preference weighting */ double *temp = NULL; /* return argument from srealloc_ret() */ int *indices = NULL; /* sorted order for communication edges */ int *space = NULL; /* space for mergesort */ int *sizes = NULL; /* sizes of the different sets */ int *sub_assign = NULL; /* new assignment for subgraph */ int *old_sub_assign = NULL; /* room for current sub assignment */ int **edges_list = NULL;/* lists of comm graph edges */ int **ewgts_list = NULL;/* lists of comm graph edge wgts */ int *ewgts = NULL; /* loops through ewgts_list */ int *edges = NULL; /* edges in communication graph */ int *adj_sets = NULL; /* weights connecting sets */ int *eptr = NULL; /* loop through edges and edge weights */ int *ewptr = NULL; /* loop through edges and edge weights */ int ewgt; /* weight of an edge */ struct vtx_data **subgraph = NULL; /* subgraph data structure */ int *nedges = NULL; /* space for saving graph data */ int *degrees = NULL; /* # neighbors of vertices */ int *glob2loc = NULL; /* maps full to reduced numbering */ int *loc2glob = NULL; /* maps reduced to full numbering */ int nmax; /* largest subgraph I expect to encounter */ int set, set1, set2; /* sets vertices belong to */ int vertex; /* vertex in graph */ int ncomm; /* # edges in communication graph */ int dist=-1; /* architectural distance between two sets */ int nsets_tot=0; /* total number of processors */ int change; /* did change occur in this pass? */ int any_change = FALSE;/* has any change occured? */ int error; /* out of space? */ int size; /* array spacing */ int i, j, k; /* loop counters */ int abs(), kl_refine(); void mergesort(), strout(); error = 1; term_wgts[1] = NULL; if (architecture == 0) { nsets_tot = 1 << ndims_tot; } else if (architecture > 0) { nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2]; } hops[0][0] = hops[1][1] = 0; if (!TERM_PROP) { hops[0][1] = hops[1][0] = 1; } /* Set up convenient data structure for navigating through sets. */ set_list = smalloc_ret(nsets_tot * sizeof(struct bilist)); vtx_elems = smalloc_ret((nvtxs + 1) * sizeof(struct bilist)); sizes = smalloc_ret(nsets_tot * sizeof(int)); if (set_list == NULL || vtx_elems == NULL || sizes == NULL) goto skip; for (i = 0; i < nsets_tot; i++) { set_list[i].next = NULL; sizes[i] = 0; } for (i = 1; i <= nvtxs; i++) { set = assign[i]; ++sizes[set]; vtx_elems[i].next = set_list[set].next; if (vtx_elems[i].next != NULL) { vtx_elems[i].next->prev = &(vtx_elems[i]); } vtx_elems[i].prev = &(set_list[set]); set_list[set].next = &(vtx_elems[i]); } /* For each set, find connections to all set neighbors. */ edges_list = smalloc_ret(nsets_tot * sizeof(int *)); if (edges_list == NULL) goto skip; for (set = 0; set < nsets_tot-1; set++) { edges_list[set] = NULL; } ewgts_list = smalloc_ret(nsets_tot * sizeof(int *)); if (ewgts_list == NULL) goto skip; for (set = 0; set < nsets_tot-1; set++) { ewgts_list[set] = NULL; } nedges = smalloc_ret(nsets_tot * sizeof(int)); adj_sets = smalloc_ret(nsets_tot * sizeof(int)); if (nedges == NULL || adj_sets == NULL) goto skip; size = (int) (&(vtx_elems[1]) - &(vtx_elems[0])); ncomm = 0; ewgt = 1; nmax = 0; for (set = 0; set < nsets_tot-1; set++) { if (sizes[set] > nmax) nmax = sizes[set]; for (i = 0; i < nsets_tot; i++) { adj_sets[i] = 0; } for (ptr = set_list[set].next; ptr != NULL; ptr = ptr->next) { vertex = ((int) (ptr - vtx_elems)) / size; for (j = 1; j < graph[vertex]->nedges; j++) { set2 = assign[graph[vertex]->edges[j]]; if (using_ewgts) ewgt = graph[vertex]->ewgts[j]; adj_sets[set2] += ewgt; } } /* Now save adj_sets data to later construct graph. */ j = 0; for (i = set+1; i < nsets_tot; i++) { if (adj_sets[i] != 0) j++; } nedges[set] = j; if (j) { edges_list[set] = edges = smalloc_ret(j * sizeof(int)); ewgts_list[set] = ewgts = smalloc_ret(j * sizeof(int)); if (edges == NULL || ewgts == NULL) goto skip; } j = 0; for (i = set + 1; i < nsets_tot; i++) { if (adj_sets[i] != 0) { edges[j] = i; ewgts[j] = adj_sets[i]; j++; } } ncomm += j; } sfree(adj_sets); adj_sets = NULL; /* Now compact all communication weight information into single */ /* vector for sorting. */ pairs = smalloc_ret((ncomm + 1) * sizeof(struct ipairs)); comm_vals = smalloc_ret((ncomm + 1) * sizeof(double)); if (pairs == NULL || comm_vals == NULL) goto skip; j = 0; for (set = 0; set < nsets_tot - 1; set++) { eptr = edges_list[set]; ewptr = ewgts_list[set]; for (k = 0; k < nedges[set]; k++) { set2 = eptr[k]; pairs[j].val1 = set; pairs[j].val2 = set2; comm_vals[j] = ewptr[k]; j++; } } sfree(nedges); nedges = NULL; indices = smalloc_ret((ncomm + 1) * sizeof(int)); space = smalloc_ret((ncomm + 1) * sizeof(int)); if (indices == NULL || space == NULL) goto skip; mergesort(comm_vals, ncomm, indices, space); sfree(space); sfree(comm_vals); space = NULL; comm_vals = NULL; for (set = 0; set < nsets_tot - 1; set++) { if (edges_list[set] != NULL) sfree(edges_list[set]); if (ewgts_list[set] != NULL) sfree(ewgts_list[set]); } sfree(ewgts_list); sfree(edges_list); ewgts_list = NULL; edges_list = NULL; /* 2 for 2 subsets, 20 for safety margin. Should check this at run time. */ nmax = 2 * nmax + 20; subgraph = smalloc_ret((nmax + 1) * sizeof(struct vtx_data *)); degrees = smalloc_ret((nmax + 1) * sizeof(int)); glob2loc = smalloc_ret((nvtxs + 1) * sizeof(int)); loc2glob = smalloc_ret((nmax + 1) * sizeof(int)); sub_assign = smalloc_ret((nmax + 1) * sizeof(int)); old_sub_assign = smalloc_ret((nmax + 1) * sizeof(int)); if (subgraph == NULL || degrees == NULL || glob2loc == NULL || loc2glob == NULL || sub_assign == NULL || old_sub_assign == NULL) { goto skip; } if (TERM_PROP) { term_wgts[1] = smalloc_ret((nmax + 1) * sizeof(float)); if (term_wgts[1] == NULL) goto skip; } else { term_wgts[1] = NULL; } /* Do large boundaries first to encourage convergence. */ any_change = FALSE; for (i = ncomm - 1; i >= 0; i--) { j = indices[i]; set1 = pairs[j].val1; set2 = pairs[j].val2; /* Make sure subgraphs aren't too big. */ if (sizes[set1] + sizes[set2] > nmax) { nmax = sizes[set1] + sizes[set2]; temp = srealloc_ret(subgraph, (nmax + 1) * sizeof(struct vtx_data *)); if (temp == NULL) { goto skip; } else { subgraph = (struct vtx_data **) temp; } temp = srealloc_ret(degrees, (nmax + 1) * sizeof(int)); if (temp == NULL) { goto skip; } else { degrees = (int *) temp; } temp = srealloc_ret(loc2glob, (nmax + 1) * sizeof(int)); if (temp == NULL) { goto skip; } else { loc2glob = (int *) temp; } temp = srealloc_ret(sub_assign, (nmax + 1) * sizeof(int)); if (temp == NULL) { goto skip; } else { sub_assign = (int *) temp; } temp = srealloc_ret(old_sub_assign, (nmax + 1) * sizeof(int)); if (temp == NULL) { goto skip; } else { old_sub_assign = (int *) temp; } if (TERM_PROP) { temp = srealloc_ret(term_wgts[1], (nmax + 1) * sizeof(float)); if (temp == NULL) { goto skip; } else { term_wgts[1] = (float *) temp; } } } if (TERM_PROP) { if (architecture == 0) { j = set1 ^ set2; dist = 0; while (j) { if (j & 1) dist++; j >>= 1; } } else if (architecture > 0) { dist = abs((set1 % mesh_dims[0]) - (set2 % mesh_dims[0])); dist += abs(((set1 / mesh_dims[0]) % mesh_dims[1]) - ((set2 / mesh_dims[0]) % mesh_dims[1])); dist += abs((set1 / (mesh_dims[0] * mesh_dims[1])) - (set2 / (mesh_dims[0] * mesh_dims[1]))); } hops[0][1] = hops[1][0] = (int) dist; }
int reformat(int * start, /* start of edge list for each vertex */ int * adjacency, /* edge list data */ int nvtxs, /* number of vertices in graph */ int * pnedges, /* ptr to number of edges in graph */ int * vwgts, /* weights for all vertices */ float * ewgts, /* weights for all edges */ struct vtx_data ***pgraph /* ptr to array of vtx data for graph */ ) { extern FILE * Output_File; /* output file or null */ struct vtx_data **graph = NULL; /* array of vtx data for graph */ struct vtx_data * links = NULL; /* space for data for all vtxs */ int * edges = NULL; /* space for all adjacency lists */ float * eweights = NULL; /* space for all edge weights */ int * eptr = NULL; /* steps through adjacency list */ int * eptr_save = NULL; /* saved index into adjacency list */ float * wptr = NULL; /* steps through edge weights list */ int self_edge; /* number of self loops detected */ int size; /* length of all edge lists */ double sum; /* sum of edge weights for a vtx */ int using_ewgts; /* are edge weights being used? */ int using_vwgts; /* are vertex weights being used? */ int i, j; /* loop counters */ using_ewgts = (ewgts != NULL); using_vwgts = (vwgts != NULL); graph = smalloc_ret((nvtxs + 1) * sizeof(struct vtx_data *)); *pgraph = graph; if (graph == NULL) return (1); graph[1] = NULL; /* Set up all the basic data structure for the vertices. */ /* Replace many small mallocs by a few large ones. */ links = smalloc_ret((nvtxs) * sizeof(struct vtx_data)); if (links == NULL) return (1); for (i = 1; i <= nvtxs; i++) { graph[i] = links++; } graph[1]->edges = NULL; graph[1]->ewgts = NULL; /* Now fill in all the data fields. */ if (start != NULL) *pnedges = start[nvtxs] / 2; else *pnedges = 0; size = 2 * (*pnedges) + nvtxs; edges = smalloc_ret(size * sizeof(int)); if (edges == NULL) return (1); if (using_ewgts) { eweights = smalloc_ret(size * sizeof(float)); if (eweights == NULL) return (1); } if (start != NULL) { eptr = adjacency + start[0]; wptr = ewgts; } self_edge = 0; for (i = 1; i <= nvtxs; i++) { if (using_vwgts) graph[i]->vwgt = *(vwgts++); else graph[i]->vwgt = 1; if (start != NULL) size = start[i] - start[i - 1]; else size = 0; graph[i]->nedges = size + 1; graph[i]->edges = edges; *edges++ = i; eptr_save = eptr; for (j = size; j; j--) { if (*eptr != i) *edges++ = *eptr++; else { /* Self edge, skip it. */ if (!self_edge) { printf("WARNING: Self edge (%d,%d) being ignored\n", i, i); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Self edge (%d,%d) being ignored\n", i, i); } } ++self_edge; eptr++; --(graph[i]->nedges); --(*pnedges); } } if (using_ewgts) { graph[i]->ewgts = eweights; eweights++; sum = 0; for (j = size; j; j--) { if (*eptr_save++ != i) { sum += *wptr; *eweights++ = *wptr++; } else wptr++; } graph[i]->ewgts[0] = -sum; } else graph[i]->ewgts = NULL; } if (self_edge > 1) { printf("WARNING: %d self edges were detected and ignored\n", self_edge); if (Output_File != NULL) { fprintf(Output_File, "WARNING: %d self edges were detected and ignored\n", self_edge); } } return (0); }