Exemple #1
0
void 
coarsen (
/* Coarsen until nvtxs <= vmax, compute and uncoarsen. */
    struct vtx_data **graph,	/* array of vtx data for graph */
    int nvtxs,		/* number of vertices in graph */
    int nedges,		/* number of edges in graph */
    int using_vwgts,		/* are vertices weights being used? */
    int using_ewgts,		/* are edge weights being used? */
    float *term_wgts[],		/* terminal weights */
    int igeom,		/* dimension for geometric information */
    float **coords,		/* coordinates for vertices */
    double **yvecs,		/* eigenvectors returned */
    int ndims,		/* number of eigenvectors to calculate */
    int solver_flag,		/* which eigensolver to use */
    int vmax,			/* largest subgraph to stop coarsening */
    double eigtol,		/* tolerence in eigen calculation */
    int nstep,		/* number of coarsenings between RQI steps */
    int step,			/* current step number */
    int give_up		/* has coarsening bogged down? */
)
{
    extern FILE *Output_File;	/* output file or null */
    extern int DEBUG_COARSEN;	/* debug flag for coarsening */
    extern int PERTURB;		/* was matrix perturbed in Lanczos? */
    extern double COARSEN_RATIO_MIN;	/* min vtx reduction for coarsening */
    extern int COARSEN_VWGTS;	/* use vertex weights while coarsening? */
    extern int COARSEN_EWGTS;	/* use edge weights while coarsening? */
    extern double refine_time;	/* time for RQI/Symmlq iterative refinement */
    struct vtx_data **cgraph;	/* array of vtx data for coarsened graph */
    struct orthlink *orthlist;	/* list of lower evecs to suppress */
    struct orthlink *newlink;	/* lower evec to suppress */
    double   *cyvecs[MAXDIMS + 1];	/* eigenvectors for subgraph */
    double    evals[MAXDIMS + 1];	/* eigenvalues returned */
    double    goal[MAXSETS];	/* needed for convergence mode = 1 */
    double   *r1, *r2, *work;	/* space needed by symmlq/RQI */
    double   *v, *w, *x, *y;	/* space needed by symmlq/RQI */
    double   *gvec;		/* rhs vector in extended eigenproblem */
    double    evalest;		/* eigenvalue estimate returned by RQI */
    double    maxdeg;		/* maximum weighted degree of a vertex */
    float   **ccoords;		/* coordinates for coarsened graph */
    float    *cterm_wgts[MAXSETS];	/* coarse graph terminal weights */
    float    *new_term_wgts[MAXSETS];	/* terminal weights for Bui's method*/
    float   **real_term_wgts;	/* one of the above */
    float    *twptr;		/* loops through term_wgts */
    float    *twptr_save;	/* copy of twptr */
    float    *ctwptr;		/* loops through cterm_wgts */
    double   *vwsqrt = NULL;	/* square root of vertex weights */
    double    norm, alpha;	/* values used for orthogonalization */
    double    initshift;	/* initial shift for RQI */
    double    total_vwgt;	/* sum of all the vertex weights */
    double    w1, w2;		/* weights of two sets */
    double    sigma;		/* norm of rhs in extended eigenproblem */
    double    term_tot;		/* sum of all terminal weights */
    int    *space;		/* room for assignment in Lanczos */
    int      *morespace;	/* room for assignment in Lanczos */
    int      *v2cv;		/* mapping from vertices to coarse vtxs */
    int       vwgt_max;		/* largest vertex weight */
    int       oldperturb;	/* saves PERTURB value */
    int       cnvtxs;		/* number of vertices in coarsened graph */
    int       cnedges;		/* number of edges in coarsened graph */
    int       nextstep;		/* next step in RQI test */
    int       nsets;		/* number of sets being created */
    int       i, j;		/* loop counters */
    double    time;		/* time marker */

    double   dot(), ch_normalize(), find_maxdeg(), seconds();
    struct orthlink *makeorthlnk();
    void      makevwsqrt(), eigensolve(), coarsen1(), orthogvec(), rqi_ext();
    void      ch_interpolate(), orthog1(), rqi(), scadd(), free_graph();

    if (DEBUG_COARSEN > 0) {
	printf("<Entering coarsen, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n",
	       step, nvtxs, nedges, vmax);
    }

    nsets = 1 << ndims;

    /* Is problem small enough to solve? */
    if (nvtxs <= vmax || give_up) {
	if (using_vwgts) {
	    vwsqrt = smalloc((nvtxs + 1) * sizeof(double));
	    makevwsqrt(vwsqrt, graph, nvtxs);
	}
	else
	    vwsqrt = NULL;
	maxdeg = find_maxdeg(graph, nvtxs, using_ewgts, (float *) NULL);

	if (using_vwgts) {
	    vwgt_max = 0;
	    total_vwgt = 0;
	    for (i = 1; i <= nvtxs; i++) {
		if (graph[i]->vwgt > vwgt_max)
		    vwgt_max = graph[i]->vwgt;
		total_vwgt += graph[i]->vwgt;
	    }
	}
	else {
	    vwgt_max = 1;
	    total_vwgt = nvtxs;
	}
	for (i = 0; i < nsets; i++)
	    goal[i] = total_vwgt / nsets;

	space = smalloc((nvtxs + 1) * sizeof(int));

	/* If not coarsening ewgts, then need care with term_wgts. */
	if (!using_ewgts && term_wgts[1] != NULL && step != 0) {
	    twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float));
	    twptr_save = twptr;
	    for (j = 1; j < nsets; j++) {
	        new_term_wgts[j] = twptr;
	        twptr += nvtxs + 1;
	    }

	    for (j = 1; j < nsets; j++) {
	        twptr = term_wgts[j];
	        ctwptr = new_term_wgts[j];
	        for (i = 1; i <= nvtxs; i++) {
		    if (twptr[i] > .5) ctwptr[i] = 1;
		    else if (twptr[i] < -.5) ctwptr[i] = -1;
		    else ctwptr[i] = 0;
		}
	    }
	    real_term_wgts = new_term_wgts;
	}
	else {
	    real_term_wgts = term_wgts;
	    new_term_wgts[1] = NULL;
	}

	eigensolve(graph, nvtxs, nedges, maxdeg, vwgt_max, vwsqrt,
		   using_vwgts, using_ewgts, real_term_wgts, igeom, coords,
		   yvecs, evals, 0, space, goal,
		   solver_flag, FALSE, 0, ndims, 3, eigtol);

	if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) {
	    sfree(real_term_wgts[1]);
	}
	sfree(space);
	if (vwsqrt != NULL)
	    sfree(vwsqrt);
	return;
    }

    /* Otherwise I have to coarsen. */

    if (coords != NULL) {
	ccoords = smalloc(igeom * sizeof(float *));
    }
    else {
	ccoords = NULL;
    }
    coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges,
	     &v2cv, igeom, coords, ccoords, using_ewgts);

    /* If coarsening isn't working very well, give up and partition. */
    give_up = FALSE;
    if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax ) {
	printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
	    nvtxs, cnvtxs);
	printf("         Recursive coarsening being stopped prematurely.\n");
	if (Output_File != NULL) {
	    fprintf(Output_File,
		"WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
	        nvtxs, cnvtxs);
	    fprintf(Output_File,
		"         Recursive coarsening being stopped prematurely.\n");
	}
	give_up = TRUE;
    }

    /* Create space for subgraph yvecs. */
    for (i = 1; i <= ndims; i++) {
	cyvecs[i] = smalloc((cnvtxs + 1) * sizeof(double));
    }

    /* Make coarse version of terminal weights. */
    if (term_wgts[1] != NULL) {
	twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float));
	twptr_save = twptr;
	for (i = (cnvtxs + 1) * (nsets - 1); i ; i--) {
	    *twptr++ = 0;
	}
	twptr = twptr_save;
	for (j = 1; j < nsets; j++) {
	    cterm_wgts[j] = twptr;
	    twptr += cnvtxs + 1;
	}
	for (j = 1; j < nsets; j++) {
	    ctwptr = cterm_wgts[j];
	    twptr = term_wgts[j];
	    for (i = 1; i < nvtxs; i++){
	        ctwptr[v2cv[i]] += twptr[i];
	    }
	}
    }
    else {
	cterm_wgts[1] = NULL;
    }

    /* Now recurse on coarse subgraph. */
    nextstep = step + 1;
    coarsen(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts,
	    igeom, ccoords, cyvecs, ndims, solver_flag, vmax, eigtol,
	    nstep, nextstep, give_up);

    ch_interpolate(yvecs, cyvecs, ndims, graph, nvtxs, v2cv, using_ewgts);

    sfree(cterm_wgts[1]);
    sfree(v2cv);

    /* I need to do Rayleigh Quotient Iteration each nstep stages. */
    time = seconds();
    if (!(step % nstep)) {
	oldperturb = PERTURB;
	PERTURB = FALSE;
	/* Should I do some orthogonalization here against vwsqrt? */
	if (using_vwgts) {
	    vwsqrt = smalloc((nvtxs + 1) * sizeof(double));
	    makevwsqrt(vwsqrt, graph, nvtxs);

	    for (i = 1; i <= ndims; i++)
		orthogvec(yvecs[i], 1, nvtxs, vwsqrt);
	}
	else
	    for (i = 1; i <= ndims; i++)
		orthog1(yvecs[i], 1, nvtxs);

	/* Allocate space that will be needed in RQI. */
	r1 = smalloc(7 * (nvtxs + 1) * sizeof(double));
	r2 = &r1[nvtxs + 1];
	v = &r1[2 * (nvtxs + 1)];
	w = &r1[3 * (nvtxs + 1)];
	x = &r1[4 * (nvtxs + 1)];
	y = &r1[5 * (nvtxs + 1)];
	work = &r1[6 * (nvtxs + 1)];

	if (using_vwgts) {
	    vwgt_max = 0;
	    total_vwgt = 0;
	    for (i = 1; i <= nvtxs; i++) {
		if (graph[i]->vwgt > vwgt_max)
		    vwgt_max = graph[i]->vwgt;
		total_vwgt += graph[i]->vwgt;
	    }
	}
	else {
	    vwgt_max = 1;
	    total_vwgt = nvtxs;
	}
	for (i = 0; i < nsets; i++)
	    goal[i] = total_vwgt / nsets;

	space = smalloc((nvtxs + 1) * sizeof(int));
	morespace = smalloc((nvtxs) * sizeof(int));

	initshift = 0;
	orthlist = NULL;
	for (i = 1; i < ndims; i++) {
	    ch_normalize(yvecs[i], 1, nvtxs);
	    rqi(graph, yvecs, i, nvtxs, r1, r2, v, w, x, y, work,
		eigtol, initshift, &evalest, vwsqrt, orthlist,
		0, nsets, space, morespace, 3, goal, vwgt_max, ndims);

	    /* Now orthogonalize higher yvecs against this one. */
	    norm = dot(yvecs[i], 1, nvtxs, yvecs[i]);
	    for (j = i + 1; j <= ndims; j++) {
		alpha = -dot(yvecs[j], 1, nvtxs, yvecs[i]) / norm;
		scadd(yvecs[j], 1, nvtxs, alpha, yvecs[i]);
	    }

	    /* Now prepare for next pass through loop. */
	    initshift = evalest;
	    newlink = makeorthlnk();
	    newlink->vec = yvecs[i];
	    newlink->pntr = orthlist;
	    orthlist = newlink;

	}
	ch_normalize(yvecs[ndims], 1, nvtxs);

	if (term_wgts[1] != NULL && ndims == 1) {
	    /* Solve extended eigen problem */

	    /* If not coarsening ewgts, then need care with term_wgts. */
	    if (!using_ewgts && term_wgts[1] != NULL && step != 0) {
	        twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float));
	        twptr_save = twptr;
	        for (j = 1; j < nsets; j++) {
	            new_term_wgts[j] = twptr;
	            twptr += nvtxs + 1;
	        }

	        for (j = 1; j < nsets; j++) {
	            twptr = term_wgts[j];
	            ctwptr = new_term_wgts[j];
	            for (i = 1; i <= nvtxs; i++) {
		        if (twptr[i] > .5) ctwptr[i] = 1;
		        else if (twptr[i] < -.5) ctwptr[i] = -1;
		        else ctwptr[i] = 0;
		    }
	        }
	        real_term_wgts = new_term_wgts;
	    }
	    else {
	        real_term_wgts = term_wgts;
	        new_term_wgts[1] = NULL;
	    }

	    /* Following only works for bisection. */
	    w1 = goal[0];
	    w2 = goal[1];
	    sigma = sqrt(4*w1*w2/(w1+w2));
	    gvec = smalloc((nvtxs+1)*sizeof(double));
	    term_tot = sigma;	/* Avoids lint warning for now. */
	    term_tot = 0;
	    for (j=1; j<=nvtxs; j++) term_tot += (real_term_wgts[1])[j];
	    term_tot /= (w1+w2);
	    if (using_vwgts) {
	        for (j=1; j<=nvtxs; j++) {
		    gvec[j] = (real_term_wgts[1])[j]/graph[j]->vwgt - term_tot;
		}
	    }
	    else {
	        for (j=1; j<=nvtxs; j++) {
		    gvec[j] = (real_term_wgts[1])[j] - term_tot;
		}
	    }

	    rqi_ext();

	    sfree(gvec);
	    if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) {
		sfree(new_term_wgts[1]);
	    }
	}
	else {
	    rqi(graph, yvecs, ndims, nvtxs, r1, r2, v, w, x, y, work,
	        eigtol, initshift, &evalest, vwsqrt, orthlist,
	        0, nsets, space, morespace, 3, goal, vwgt_max, ndims);
	}
	refine_time += seconds() - time;

	/* Free the space allocated for RQI. */
	sfree(morespace);
	sfree(space);
	while (orthlist != NULL) {
	    newlink = orthlist->pntr;
	    sfree(orthlist);
	    orthlist = newlink;
	}
	sfree(r1);
	if (vwsqrt != NULL)
	    sfree(vwsqrt);
	PERTURB = oldperturb;
    }
    if (DEBUG_COARSEN > 0) {
	printf(" Leaving coarsen, step=%d\n", step);
    }

    /* Free the space that was allocated. */
    if (ccoords != NULL) {
	for (i = 0; i < igeom; i++)
	    sfree(ccoords[i]);
	sfree(ccoords);
    }
    for (i = ndims; i > 0; i--)
	sfree(cyvecs[i]);
    free_graph(cgraph);
}
Exemple #2
0
/* Perform KL between two sets. */
int 
kl_refine (
    struct vtx_data **graph,	/* graph data structure */
    struct vtx_data **subgraph,	/* space for subgraph to refine */
    struct bilist *set_list,	/* lists of vtxs in each set */
    struct bilist *vtx_elems,	/* start of storage for lists */
    int *new_assign,		/* set assignments for all vertices */
    int set1,
    int set2,		/* two sets being refined */
    int *glob2loc,		/* maps vertices to subgraph vertices */
    int *loc2glob,		/* maps subgraph vertices to vertices */
    int *sub_assign,		/* new assignment for subgraphs */
    int *old_sub_assign,	/* current assignment for subgraphs */
    int *degrees,		/* space for forming subgraphs */
    int using_ewgts,		/* are edge weights being used? */
    int (*hops)[MAXSETS],	/* KL set preferences */
    double *goal,			/* desired set sizes */
    int *sizes,		/* number of vertices in different sets */
    float *term_wgts[],		/* space for terminal propagation weights */
    int architecture,		/* 0 => hypercube, d => d-dimensional mesh */
    int mesh_dims[3]		/* if mesh, how big is it? */
)
{
    extern int TERM_PROP;	/* perform terminal propagation? */
    extern double KL_IMBALANCE;	/* fractional imbalance allowed in KL */
    struct bilist *ptr;		/* element in set_list */
    double    subgoal[2];	/* goal within two subgraphs */
    double    weights[2];	/* weights for each set */
    double    maxdeg;		/* largest degree of a vertex */
    double    ratio;		/* set sizes / goals */
    int      *null_ptr;		/* argument to klspiff */
    int       vwgt_max;		/* largest vertex weight */
    int       max_dev;		/* largest set deviation allowed in KL */
    int       subnvtxs;		/* number of vtxs in subgraph */
    int       vwgt_sum1;	/* sum of vertex wgts in first set */
    int       vwgt_sum2;	/* sum of vertex wgts in second set */
    int       subnedges;	/* number of edges in subgraph */
    int       setA, setB;	/* two sets being refined */
    int       nsame;		/* number of vertices not moved */
    int       vtx;		/* vertex in subgraph */
    int       i;		/* loop counter */
    double    find_maxdeg();
    void      make_maps_ref(), make_subgraph(), remake_graph();
    void      klspiff(), make_terms_ref(), count_weights();

    /* Compute all the quantities I'll need. */
    null_ptr = NULL;
    make_maps_ref(graph, set_list, vtx_elems, new_assign, sub_assign, 
	set1, set2, glob2loc, loc2glob, &subnvtxs, &vwgt_max,
	&vwgt_sum1, &vwgt_sum2);

    for (i = 1; i <= subnvtxs; i++)
	old_sub_assign[i] = sub_assign[i];

    /* Set up goals for this KL invocation. */
    ratio = (vwgt_sum1 + vwgt_sum2) / (goal[set1] + goal[set2]);
    subgoal[0] = ratio * goal[set1];
    subgoal[1] = ratio * goal[set2];

    if (TERM_PROP) {
        make_terms_ref(graph, using_ewgts, subnvtxs, loc2glob,
	   set1, set2, new_assign, architecture, mesh_dims, term_wgts);
    }

    /* New_assign has overwritten set2 with set1. */
    make_subgraph(graph, subgraph, subnvtxs, &subnedges, new_assign, set1,
		  glob2loc, loc2glob, degrees, using_ewgts);

    maxdeg = find_maxdeg(subgraph, subnvtxs, using_ewgts, (float *) NULL);

    count_weights(subgraph, subnvtxs, sub_assign, 2, weights, (vwgt_max != 1));

    max_dev = vwgt_max;
    ratio = (subgoal[0] + subgoal[1]) * KL_IMBALANCE / 2;
    if (ratio > max_dev) {
	max_dev = ratio;
    }

    klspiff(subgraph, subnvtxs, sub_assign, 2, hops, subgoal,
	    term_wgts, max_dev, maxdeg, using_ewgts, &null_ptr, weights);

    /* Figure out which modification leaves most vertices intact. */
    nsame = 0;
    for (i = 1; i <= subnvtxs; i++) {
	if (old_sub_assign[i] == sub_assign[i])
	    nsame++;
    }
    if (2 * nsame > subnvtxs) {
	setA = set1;
	setB = set2;
    }
    else {
	setA = set2;
	setB = set1;
    }

    /* Now update the assignments. */
    sizes[setA] = sizes[setB] = 0;
    for (i = 1; i <= subnvtxs; i++) {
	vtx = loc2glob[i];
	/* Update the set_lists. */
	ptr = &(vtx_elems[vtx]);
	if (ptr->next != NULL) {
	    ptr->next->prev = ptr->prev;
	}
	if (ptr->prev != NULL) {
	    ptr->prev->next = ptr->next;
	}

	if (sub_assign[i] == 0) {
	    new_assign[vtx] = (int) setA;
	    ++sizes[setA];
	    ptr->next = set_list[setA].next;
	    if (ptr->next != NULL) 
	        ptr->next->prev = ptr;
	    ptr->prev = &(set_list[setA]);
	    set_list[setA].next = ptr;
	}
	else {
	    new_assign[vtx] = (int) setB;
	    ++sizes[setB];
	    ptr->next = set_list[setB].next;
	    if (ptr->next != NULL) 
	        ptr->next->prev = ptr;
	    ptr->prev = &(set_list[setB]);
	    set_list[setB].next = ptr;
	}
    }

    remake_graph(subgraph, subnvtxs, loc2glob, degrees, using_ewgts);

    return(nsame != subnvtxs);
}
Exemple #3
0
/* begin at 1 instead of at 0. */
int refine_mesh(struct vtx_data **comm_graph,   /* graph for communication requirements */
                int               cube_or_mesh, /* number of dimensions in mesh */
                int               mesh_dims[3], /* dimensions of mesh */
                double            maxdesire,    /* largest possible desire to flip an edge */
                int *             vtx2node,     /* mapping from comm_graph vtxs to mesh nodes */
                int *             node2vtx      /* mapping from mesh nodes to comm_graph vtxs */
                )
{
  struct refine_vdata * vdata = NULL;      /* desire data for all vertices */
  struct refine_vdata * vptr;              /* loops through vdata */
  struct refine_edata * edata = NULL;      /* desire data for all edges */
  struct refine_edata * eguy;              /* one element in edata array */
  struct refine_edata **desire_ptr = NULL; /* array of desire buckets */
  double *              desires    = NULL; /* each edge's inclination to flip */
  int *                 indices    = NULL; /* sorted list of desire values */
  int *                 space      = NULL; /* used for sorting disire values */
  double                best_desire;       /* highest desire of edge to flip */
  int                   imax;              /* maxdesire rounded up */
  int                   nsets_tot;         /* total number of sets/processors */
  int                   neighbor;          /* neighboring vertex */
  int                   dim;               /* loops over mesh dimensions */
  int                   nwires;            /* number of wires in processor mesh */
  int                   wire;              /* loops through all wires */
  int                   node1, node2;      /* processors joined by a wire */
  int                   vtx1, vtx2;        /* corresponding vertices in comm_graph */
  int                   loc1, loc2;        /* location of vtxs in flipping dimension */
  int                   error;             /* out of space? */
  int                   i, j, k;           /* loop counter */
  double                find_maxdeg();
  double                compute_mesh_edata();

  void compute_mesh_vdata(), init_mesh_edata(), mergesort();
  void update_mesh_vdata(), update_mesh_edata();

  error = 1;

  nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2];

  imax = maxdesire;
  if (imax != maxdesire)
    imax++;

  vdata = (struct refine_vdata *)smalloc_ret((cube_or_mesh * nsets_tot + 1) *
                                             sizeof(struct refine_vdata));
  if (vdata == NULL)
    goto skip;

  /* Compute each node's desires to move or stay put. */
  vptr = vdata;
  for (dim = 0; dim < cube_or_mesh; dim++) {
    for (i = 1; i <= nsets_tot; i++) {
      compute_mesh_vdata(++vptr, comm_graph, i, vtx2node, mesh_dims, dim);
    }
  }

  nwires = (mesh_dims[0] - 1) * mesh_dims[1] * mesh_dims[2] +
           mesh_dims[0] * (mesh_dims[1] - 1) * mesh_dims[2] +
           mesh_dims[0] * mesh_dims[1] * (mesh_dims[2] - 1);

  edata   = smalloc_ret((nwires + 1) * sizeof(struct refine_edata));
  desires = smalloc_ret(nwires * sizeof(double));
  if (vdata == NULL || desires == NULL)
    goto skip;

  /* Initialize all the edge values. */
  init_mesh_edata(edata, mesh_dims);
  for (wire = 0; wire < nwires; wire++) {
    desires[wire] = edata[wire].swap_desire =
        compute_mesh_edata(&(edata[wire]), vdata, mesh_dims, comm_graph, node2vtx);
  }

  /* Set special value for end pointer. */
  edata[nwires].swap_desire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL);

  /* I now need to sort all the wire preference values */
  indices = smalloc_ret(nwires * sizeof(int));
  space   = smalloc_ret(nwires * sizeof(int));
  if (indices == NULL || space == NULL)
    goto skip;

  mergesort(desires, nwires, indices, space);

  sfree(space);
  sfree(desires);
  space   = NULL;
  desires = NULL;

  best_desire = (edata[indices[nwires - 1]]).swap_desire;

  /* Now construct a buckets of linked lists with desire values. */

  if (best_desire > 0) {
    desire_ptr =
        (struct refine_edata **)smalloc_ret((2 * imax + 1) * sizeof(struct refine_edata *));
    if (desire_ptr == NULL)
      goto skip;

    for (i          = 2 * imax; i >= 0; i--)
      desire_ptr[i] = NULL;

    for (i = nwires - 1; i >= 0; i--) {
      eguy = &(edata[indices[i]]);
      /* Round the swap desire up. */
      if (eguy->swap_desire >= 0) {
        k = eguy->swap_desire;
        if (k != eguy->swap_desire)
          k++;
      }
      else {
        k = -eguy->swap_desire;
        if (k != -eguy->swap_desire)
          k++;
        k = -k;
      }

      k += imax;

      eguy->prev = NULL;
      eguy->next = desire_ptr[k];
      if (desire_ptr[k] != NULL)
        desire_ptr[k]->prev = eguy;
      desire_ptr[k]         = eguy;
    }
  }
  else {
    desire_ptr = NULL;
  }

  sfree(indices);
  indices = NULL;

  loc1 = 0;
  loc2 = 0;

  /* Everything is now set up.  Swap sets across wires until no more improvement. */

  while (best_desire > 0) {
    k = best_desire + 1 + imax;
    if (k > 2 * imax)
      k = 2 * imax;
    while (k > imax && desire_ptr[k] == NULL)
      k--;
    eguy = desire_ptr[k];

    dim   = eguy->dim;
    node1 = eguy->node1;
    node2 = eguy->node2;
    vtx1  = node2vtx[node1];
    vtx2  = node2vtx[node2];
    if (dim == 0) {
      loc1 = node1 % mesh_dims[0];
      loc2 = node2 % mesh_dims[0];
    }
    else if (dim == 1) {
      loc1 = (node1 / mesh_dims[0]) % mesh_dims[1];
      loc2 = (node2 / mesh_dims[0]) % mesh_dims[1];
    }
    else if (dim == 2) {
      loc1 = node1 / (mesh_dims[0] * mesh_dims[1]);
      loc2 = node2 / (mesh_dims[0] * mesh_dims[1]);
    }

    /* Now swap the vertices. */
    node2vtx[node1] = (int)vtx2;
    node2vtx[node2] = (int)vtx1;
    vtx2node[vtx1]  = (int)node2;
    vtx2node[vtx2]  = (int)node1;

    /* First update all the vdata fields for vertices effected by this flip. */
    for (j = 1; j < comm_graph[vtx1]->nedges; j++) {
      neighbor = comm_graph[vtx1]->edges[j];
      if (neighbor != vtx2)
        update_mesh_vdata(loc1, loc2, dim, comm_graph[vtx1]->ewgts[j], vdata, mesh_dims, neighbor,
                          vtx2node);
    }

    for (j = 1; j < comm_graph[vtx2]->nedges; j++) {
      neighbor = comm_graph[vtx2]->edges[j];
      if (neighbor != vtx1)
        update_mesh_vdata(loc2, loc1, dim, comm_graph[vtx2]->ewgts[j], vdata, mesh_dims, neighbor,
                          vtx2node);
    }

    /* Now recompute all preferences for vertices that were moved. */
    for (j = 0; j < cube_or_mesh; j++) {
      compute_mesh_vdata(&(vdata[j * nsets_tot + vtx1]), comm_graph, vtx1, vtx2node, mesh_dims, j);
      compute_mesh_vdata(&(vdata[j * nsets_tot + vtx2]), comm_graph, vtx2, vtx2node, mesh_dims, j);
    }

    /* Now I can update the values of all the edges associated with all the
       effected vertices.  Note that these include mesh neighbors of node1 and
       node2 in addition to the dim-edges of graph neighbors of vtx1 and vtx2. */

    /* For each neighbor vtx, look at -1 and +1 edge.  If desire hasn't changed,
       return.  Otherwise, pick him up and move him. Similarly for all
       directional neighbors of node1 and node2. */

    for (j = 1; j < comm_graph[vtx1]->nedges; j++) {
      neighbor = comm_graph[vtx1]->edges[j];
      if (neighbor != vtx2)
        update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node,
                          &best_desire, imax, desire_ptr);
    }

    for (j = 1; j < comm_graph[vtx2]->nedges; j++) {
      neighbor = comm_graph[vtx2]->edges[j];
      if (neighbor != vtx1)
        update_mesh_edata(neighbor, dim, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node,
                          &best_desire, imax, desire_ptr);
    }
    for (j = 0; j < cube_or_mesh; j++) {
      update_mesh_edata(vtx1, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node,
                        &best_desire, imax, desire_ptr);
      update_mesh_edata(vtx2, j, edata, vdata, comm_graph, mesh_dims, node2vtx, vtx2node,
                        &best_desire, imax, desire_ptr);
    }

    k = best_desire + 1 + imax;
    if (k > 2 * imax)
      k = 2 * imax;
    while (k > imax && desire_ptr[k] == NULL)
      k--;
    best_desire = k - imax;
  }
  error = 0;

skip:
  sfree(indices);
  sfree(space);
  sfree(desires);
  sfree(desire_ptr);
  sfree(vdata);
  sfree(edata);

  return (error);
}
Exemple #4
0
void refine_map(struct vtx_data **graph,        /* graph data structure */
                int               nvtxs,        /* number of vertices in graph */
                int               using_ewgts,  /* are edge weights being used? */
                int *             assign,       /* current assignment */
                int               cube_or_mesh, /* 0 => hypercube, d => d-dimensional mesh */
                int               ndims_tot,    /* if hypercube, number of dimensions */
                int               mesh_dims[3]  /* if mesh, dimensions of mesh */
                )
{
  struct vtx_data **comm_graph;       /* graph for communication requirements */
  int               nsets_tot = 0;    /* total number of sets */
  int *             vtx2node  = NULL; /* mapping of comm_graph vtxs to processors */
  int *             node2vtx  = NULL; /* mapping of sets to comm_graph vtxs */
  double            maxdesire = 0.0;  /* largest possible desire to flip an edge */
  int               error     = 0;    /* out of space? */
  int               i;                /* loop counter */

  double find_maxdeg();
  void   free_graph(), strout();
  int    make_comm_graph(), refine_mesh(), refine_cube();

  if (cube_or_mesh == 0)
    nsets_tot = 1 << ndims_tot;
  else if (cube_or_mesh == 1)
    nsets_tot = mesh_dims[0];
  else if (cube_or_mesh == 2)
    nsets_tot = mesh_dims[0] * mesh_dims[1];
  else if (cube_or_mesh == 3)
    nsets_tot = mesh_dims[0] * mesh_dims[1] * mesh_dims[2];

  node2vtx = vtx2node = NULL;

  /* Construct the weighted quotient graph representing communication. */
  error = make_comm_graph(&comm_graph, graph, nvtxs, using_ewgts, assign, nsets_tot);

  if (!error) {
    maxdesire = 2 * find_maxdeg(comm_graph, nsets_tot, TRUE, (float *)NULL);

    vtx2node = smalloc_ret((nsets_tot + 1) * sizeof(int));
    node2vtx = smalloc_ret(nsets_tot * sizeof(int));
    if (node2vtx == NULL || vtx2node == NULL) {
      error = 1;
      goto skip;
    }

    for (i = 1; i <= nsets_tot; i++) {
      vtx2node[i]     = (int)i - 1;
      node2vtx[i - 1] = (int)i;
    }

    if (cube_or_mesh > 0) {
      error = refine_mesh(comm_graph, cube_or_mesh, mesh_dims, maxdesire, vtx2node, node2vtx);
    }

    else if (cube_or_mesh == 0) {
      error = refine_cube(comm_graph, ndims_tot, maxdesire, vtx2node, node2vtx);
    }

    if (!error) {
      for (i = 1; i <= nvtxs; i++) {
        assign[i] = vtx2node[assign[i] + 1];
      }
    }
  }

skip:

  if (error) {
    strout("\nWARNING: No space to refine mapping to processors.");
    strout("         NO MAPPING REFINEMENT PERFORMED.\n");
  }

  sfree(node2vtx);
  sfree(vtx2node);
  free_graph(comm_graph);
}
Exemple #5
0
void 
coarsen_klv (
/* Coarsen until nvtxs < vmax, compute and uncoarsen. */
    struct vtx_data **graph,	/* array of vtx data for graph */
    int nvtxs,		/* number of vertices in graph */
    int nedges,		/* number of edges in graph */
    int using_vwgts,		/* are vertices weights being used? */
    int using_ewgts,		/* are edge weights being used? */
    float *term_wgts[],		/* weights for terminal propogation */
    int igeom,                /* dimension for geometric information */
    float **coords,               /* coordinates for vertices */
    int vwgt_max,		/* largest vertex weight */
    int *assignment,		/* processor each vertex gets assigned to */
    double *goal,			/* desired set sizes */
    int architecture,		/* 0 => hypercube, d => d-dimensional mesh */
    int (*hops)[MAXSETS],	/* cost of edge between sets */
    int solver_flag,		/* which eigensolver to use */
    int ndims,		/* number of eigenvectors to calculate */
    int nsets,		/* number of sets being divided into */
    int vmax,			/* largest subgraph to stop coarsening */
    int mediantype,		/* flag for different assignment strategies */
    int mkconnected,		/* enforce connectivity before eigensolve? */
    double eigtol,		/* tolerence in eigen calculation */
    int nstep,		/* number of coarsenings between RQI steps */
    int step,			/* current step number */
    int **pbndy_list,		/* pointer to returned boundary list */
    double *weights,		/* weights of vertices in each set */
    int give_up		/* has coarsening bogged down? */
)
{
    extern FILE *Output_File;	/* output file or null */
    extern int DEBUG_TRACE;	/* trace the execution of the code */
    extern int DEBUG_COARSEN;	/* debug flag for coarsening */
    extern double COARSEN_RATIO_MIN;	/* vtx reduction demanded */
    extern int COARSEN_VWGTS;	/* use vertex weights while coarsening? */
    extern int COARSEN_EWGTS;	/* use edge weights while coarsening? */
    extern int LIMIT_KL_EWGTS;	/* limit edges weights in KL? */
    extern int COARSE_KLV;	/* apply klv as a smoother? */
    extern int COARSE_BPM;	/* apply bipartite matching/flow as a smoother? */
    extern double KL_IMBALANCE;	/* fractional imbalance allowed in KL */
    struct vtx_data **cgraph;	/* array of vtx data for coarsened graph */
    double    new_goal[MAXSETS];/* new goal if not using vertex weights */
    double   *real_goal;	/* chooses between goal and new_goal */
    double    total_weight;	/* total weight of vertices */
    double    goal_weight;	/* total weight of vertices in goal */
    float    *cterm_wgts[MAXSETS];	/* terminal weights for coarse graph */
    float    *new_term_wgts[MAXSETS];	/* modified for Bui's method */
    float   **real_term_wgts;	/* which of previous two to use */
    float     ewgt_max;		/* largest edge weight in graph */
    float    *twptr = NULL;	/* loops through term_wgts */
    float    *twptr_save = NULL;/* copy of twptr */
    float    *ctwptr;		/* loops through cterm_wgts */
    float   **ccoords;		/* coarse graph coordinates */
    int      *v2cv;		/* mapping from vtxs to coarse vtxs */
    int      *flag;		/* scatter array for coarse bndy vtxs */
    int      *bndy_list;	/* list of vertices on boundary */
    int      *cbndy_list;	/* list of vertices of coarse graph on boundary */
    int    *cassignment;	/* set assignments for coarsened vertices */
    int       flattened;	/* was this graph flattened? */
    int       list_length;	/* length of boundary vtx list */
    int       cnvtxs;		/* number of vertices in coarsened graph */
    int       cnedges;		/* number of edges in coarsened graph */
    int       cvwgt_max;	/* largest vertex weight in coarsened graph */
    int       nextstep;		/* next step in RQI test */
    int       max_dev;		/* largest allowed deviation from balance */
    int       i, j;		/* loop counters */
    int       find_bndy(), flatten();
    double    find_maxdeg();
    void      makevwsqrt(), make_connected(), print_connected(), eigensolve();
    void      make_unconnected(), assign(), klvspiff(), coarsen1(), free_graph();
    void      compress_ewgts(), restore_ewgts(), count_weights(), bpm_improve();
    void      simple_part();

    if (DEBUG_COARSEN > 0 || DEBUG_TRACE > 0) {
	printf("<Entering coarsen_kl, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n",
	       step, nvtxs, nedges, vmax);
    }

    /* Is problem small enough to solve? */
    if (nvtxs <= vmax || give_up) {
	real_goal = goal; 

        simple_part(graph, nvtxs, assignment, nsets, 1, real_goal);
        list_length = find_bndy(graph, nvtxs, assignment, 2, &bndy_list);

        count_weights(graph, nvtxs, assignment, nsets + 1, weights, 1);

        max_dev = (step == 0) ? vwgt_max : 5 * vwgt_max;
        total_weight = 0;
        for (i = 0; i < nsets; i++)
            total_weight += real_goal[i];

	if (max_dev > total_weight) max_dev = vwgt_max;
        goal_weight = total_weight * KL_IMBALANCE / nsets;
        if (goal_weight > max_dev)
            max_dev = goal_weight;

	if (COARSE_KLV) {
            klvspiff(graph, nvtxs, assignment, real_goal,
                 max_dev, &bndy_list, weights);
	}
	if (COARSE_BPM) {
	    bpm_improve(graph, assignment, real_goal, max_dev, &bndy_list,
			    weights, using_vwgts);
	}
	*pbndy_list = bndy_list;
	return;
    }

    /* Otherwise I have to coarsen. */
    flattened = FALSE;
    if (coords != NULL) {
        ccoords = smalloc(igeom * sizeof(float *));
    }
    else {
        ccoords = NULL;
    }
    if (FLATTEN && step == 0) {
	flattened = flatten(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv,
			    using_ewgts && COARSEN_EWGTS,
			    igeom, coords, ccoords);
    }
    if (!flattened) {
	coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv,
		 igeom, coords, ccoords,
		 using_ewgts && COARSEN_EWGTS);
    }

    if (term_wgts[1] != NULL) {
	twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float));
	twptr_save = twptr;
	for (i = (cnvtxs + 1) * (nsets - 1); i; i--) {
	    *twptr++ = 0;
	}
	twptr = twptr_save;
	for (j = 1; j < nsets; j++) {
	    cterm_wgts[j] = twptr;
	    twptr += cnvtxs + 1;
	}
	for (j = 1; j < nsets; j++) {
	    ctwptr = cterm_wgts[j];
	    twptr = term_wgts[j];
	    for (i = 1; i < nvtxs; i++) {
		ctwptr[v2cv[i]] += twptr[i];
	    }
	}
    }

    else {
	cterm_wgts[1] = NULL;
    }

    /* If coarsening isn't working very well, give up and partition. */
    give_up = FALSE;
    if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax && !flattened) {
	printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
	       nvtxs, cnvtxs);
	printf("         Recursive coarsening being stopped prematurely.\n");
	if (Output_File != NULL) {
	    fprintf(Output_File,
		    "WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
		    nvtxs, cnvtxs);
	    fprintf(Output_File,
		    "         Recursive coarsening being stopped prematurely.\n");
	}
	give_up = TRUE;
    }

    /* Now recurse on coarse subgraph. */
    if (COARSEN_VWGTS) {
	cvwgt_max = 0;
	for (i = 1; i <= cnvtxs; i++) {
	    if (cgraph[i]->vwgt > cvwgt_max)
		cvwgt_max = cgraph[i]->vwgt;
	}
    }

    else
	cvwgt_max = 1;

    cassignment = smalloc((cnvtxs + 1) * sizeof(int));
    if (flattened)
	nextstep = step;
    else
	nextstep = step + 1;
    coarsen_klv(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts,
		igeom, ccoords, cvwgt_max, cassignment, goal, architecture, hops,
		solver_flag, ndims, nsets, vmax, mediantype, mkconnected,
		eigtol, nstep, nextstep, &cbndy_list, weights, give_up);

    /* Interpolate assignment back to fine graph. */
    for (i = 1; i <= nvtxs; i++) {
	assignment[i] = cassignment[v2cv[i]];
    }

    /* Construct boundary list from coarse boundary list. */
    flag = smalloc((cnvtxs + 1) * sizeof(int));
    for (i = 1; i <= cnvtxs; i++) {
	flag[i] = FALSE;
    }
    for (i = 0; cbndy_list[i]; i++) {
	flag[cbndy_list[i]] = TRUE;
    }

    list_length = 0;
    for (i = 1; i <= nvtxs; i++) {
	if (flag[v2cv[i]])
	    ++list_length;
    }

    bndy_list = smalloc((list_length + 1) * sizeof(int));

    list_length = 0;
    for (i = 1; i <= nvtxs; i++) {
	if (flag[v2cv[i]])
	    bndy_list[list_length++] = i;
    }
    bndy_list[list_length] = 0;

    sfree(flag);
    sfree(cbndy_list);


    /* Free the space that was allocated. */
    sfree(cassignment);
    if (twptr_save != NULL) {
	sfree(twptr_save);
	twptr_save = NULL;
    }
    free_graph(cgraph);
    sfree(v2cv);

    /* Smooth using KL or BPM every nstep steps. */
    if (!(step % nstep) && !flattened) {
	if (!COARSEN_VWGTS && step != 0) {	/* Construct new goal */
	    goal_weight = 0;
	    for (i = 0; i < nsets; i++)
		goal_weight += goal[i];
	    for (i = 0; i < nsets; i++)
		new_goal[i] = goal[i] * (nvtxs / goal_weight);
	    real_goal = new_goal;
	}
	else
	    real_goal = goal;

	if (LIMIT_KL_EWGTS) {
	    find_maxdeg(graph, nvtxs, using_ewgts, &ewgt_max);
	    compress_ewgts(graph, nvtxs, nedges, ewgt_max,
			   using_ewgts);
	}

	/* If not coarsening ewgts, then need care with term_wgts. */
	if (!using_ewgts && term_wgts[1] != NULL && step != 0) {
	    twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float));
	    twptr_save = twptr;
	    for (j = 1; j < nsets; j++) {
		new_term_wgts[j] = twptr;
		twptr += nvtxs + 1;
	    }

	    for (j = 1; j < nsets; j++) {
		twptr = term_wgts[j];
		ctwptr = new_term_wgts[j];
		for (i = 1; i <= nvtxs; i++) {
		    if (twptr[i] > .5)
			ctwptr[i] = 1;
		    else if (twptr[i] < -.5)
			ctwptr[i] = -1;
		    else
			ctwptr[i] = 0;
		}
	    }
	    real_term_wgts = new_term_wgts;
	}
	else {
	    real_term_wgts = term_wgts;
	    new_term_wgts[1] = NULL;
	}

	max_dev = (step == 0) ? vwgt_max : 5 * vwgt_max;
	total_weight = 0;
	for (i = 0; i < nsets; i++) {
	    total_weight += real_goal[i];
	}
	if (max_dev > total_weight)
	    max_dev = vwgt_max;
	goal_weight = total_weight * KL_IMBALANCE / nsets;
	if (goal_weight > max_dev) {
	    max_dev = goal_weight;
	}

	if (!COARSEN_VWGTS) {
	    count_weights(graph, nvtxs, assignment, nsets + 1, weights,
			  (vwgt_max != 1));
	}

	if (COARSE_KLV) {
	    klvspiff(graph, nvtxs, assignment, real_goal,
		     max_dev, &bndy_list, weights);
	}
	if (COARSE_BPM) {
	    bpm_improve(graph, assignment, real_goal, max_dev, &bndy_list,
			weights, using_vwgts);
	}

	if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) {
	    sfree(real_term_wgts[1]);
	}

	if (LIMIT_KL_EWGTS)
	    restore_ewgts(graph, nvtxs);
    }

    *pbndy_list = bndy_list;

    if (twptr_save != NULL) {
	sfree(twptr_save);
	twptr_save = NULL;
    }

    /* Free the space that was allocated. */
    if (ccoords != NULL) {
        for (i = 0; i < igeom; i++)
            sfree(ccoords[i]);
        sfree(ccoords);
    }

    if (DEBUG_COARSEN > 0) {
	printf(" Leaving coarsen_klv, step=%d\n", step);
    }
}