Example #1
0
/*************************************************************************
* This function is the entry point of the initial partition algorithm
* that does recursive bissection.
* This algorithm assembles the graph to all the processors and preceeds
* by parallelizing the recursive bisection step.
**************************************************************************/
void InitPartition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, ncon, mype, npes, gnvtxs, ngroups;
  idx_t *xadj, *adjncy, *adjwgt, *vwgt;
  idx_t *part, *gwhere0, *gwhere1;
  idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt;
  graph_t *agraph;
  idx_t lnparts, fpart, fpe, lnpes; 
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum;
  MPI_Comm ipcomm;
  struct {
    double sum;
    int rank;
  } lpesum, gpesum;

  WCOREPUSH;

  ncon = graph->ncon;

  ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

  lbvec = rwspacemalloc(ctrl, ncon);

  /* assemble the graph to all the processors */
  agraph = AssembleAdaptiveGraph(ctrl, graph);
  gnvtxs = agraph->nvtxs;

  /* make a copy of the graph's structure for later */
  xadj   = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1));
  vwgt   = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon));
  adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges));
  adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges));
  part   = iwspacemalloc(ctrl, gnvtxs);

  /* create different processor groups */
  gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);


  /* Go into the recursive bisection */
  METIS_SetDefaultOptions(moptions);
  moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1;

  tpwgts  = ctrl->tpwgts;
  tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

  lnparts = ctrl->nparts;
  fpart = fpe = 0;
  lnpes = npes;
  while (lnpes > 1 && lnparts > 1) {
    /* determine the weights of the two partitions as a function of the 
       weight of the target partition weights */
    for (j=(lnparts>>1), i=0; i<ncon; i++) {
      tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
      tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
      wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
      tpwgts2[i]      *= wsum;
      tpwgts2[ncon+i] *= wsum;
    }

    METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, 
          &edgecut, part);

    /* pick one of the branches */
    if (mype < fpe+lnpes/2) {
      KeepPart(ctrl, agraph, part, 0);
      lnpes   = lnpes/2;
      lnparts = lnparts/2;
    }
    else {
      KeepPart(ctrl, agraph, part, 1);
      fpart   = fpart + lnparts/2;
      fpe     = fpe + lnpes/2;
      lnpes   = lnpes - lnpes/2;
      lnparts = lnparts - lnparts/2;
    }
  }

  gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs));
  gwhere1 = iwspacemalloc(ctrl, gnvtxs);

  if (lnparts == 1) { /* Case npes is greater than or equal to nparts */
    /* Only the first process will assign labels (for the reduction to work) */
    if (mype == fpe) {
      for (i=0; i<agraph->nvtxs; i++) 
        gwhere0[agraph->label[i]] = fpart;
    }
  }
  else { /* Case in which npes is smaller than nparts */
    /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
    tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
    for (j=0; j<ncon; j++) {
      for (wsum=0.0, i=0; i<lnparts; i++) {
        tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
        wsum += tpwgts[i*ncon+j];
      }
      for (wsum=1.0/wsum, i=0; i<lnparts; i++) 
        tpwgts[i*ncon+j] *= wsum;
    }

    METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
          &edgecut, part);

    for (i=0; i<agraph->nvtxs; i++) 
      gwhere0[agraph->label[i]] = fpart + part[i];
  }

  gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm);

  if (ngroups > 1) {
    tmpxadj   = agraph->xadj;
    tmpadjncy = agraph->adjncy;
    tmpadjwgt = agraph->adjwgt;
    tmpvwgt   = agraph->vwgt;
    tmpwhere  = agraph->where;

    agraph->xadj   = xadj;
    agraph->adjncy = adjncy;
    agraph->adjwgt = adjwgt;
    agraph->vwgt   = vwgt;
    agraph->where  = gwhere1;
    agraph->vwgt   = vwgt;
    agraph->nvtxs  = gnvtxs;

    edgecut = ComputeSerialEdgeCut(agraph);
    ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec);
    lbsum = rsum(ncon, lbvec, 1);

    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut,   1, IDX_T,  MPI_MAX, ctrl->gcomm);
    gkMPI_Allreduce((void *)&lbsum,   (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm);

    lpesum.sum = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION*ncon) {
      if (lbsum < UNBALANCE_FRACTION*ncon)
        lpesum.sum = edgecut;
      else
        lpesum.sum = max_cut;
    } 
    lpesum.rank = ctrl->mype;
    
    gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ctrl->gcomm);
    gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm);

    agraph->xadj   = tmpxadj;
    agraph->adjncy = tmpadjncy;
    agraph->adjwgt = tmpadjwgt;
    agraph->vwgt   = tmpvwgt;
    agraph->where  = tmpwhere;
  }

  icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where);

  FreeGraph(agraph);
  gkMPI_Comm_free(&ipcomm);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

  WCOREPOP;
}
Example #2
0
void FM_Mc2WayCutRefine(ctrl_t *ctrl, graph_t *graph, real_t *ntpwgts, idx_t niter)
{
    idx_t i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass,
          me, limit, tmp, cnum;
    idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *id, *ed,
          *bndptr, *bndind;
    idx_t *moved, *swaps, *perm, *qnum;
    idx_t higain, mincut, initcut, newcut, mincutorder;
    real_t *invtvwgt, *ubfactors, *minbalv, *newbalv;
    real_t origbal, minbal, newbal, rgain, ffactor;
    rpq_t **queues;

    WCOREPUSH;

    nvtxs    = graph->nvtxs;
    ncon     = graph->ncon;
    xadj     = graph->xadj;
    vwgt     = graph->vwgt;
    adjncy   = graph->adjncy;
    adjwgt   = graph->adjwgt;
    invtvwgt = graph->invtvwgt;
    where    = graph->where;
    id       = graph->id;
    ed       = graph->ed;
    pwgts    = graph->pwgts;
    bndptr   = graph->bndptr;
    bndind   = graph->bndind;

    moved     = iwspacemalloc(ctrl, nvtxs);
    swaps     = iwspacemalloc(ctrl, nvtxs);
    perm      = iwspacemalloc(ctrl, nvtxs);
    qnum      = iwspacemalloc(ctrl, nvtxs);
    ubfactors = rwspacemalloc(ctrl, ncon);
    newbalv   = rwspacemalloc(ctrl, ncon);
    minbalv   = rwspacemalloc(ctrl, ncon);

    limit = gk_min(gk_max(0.01*nvtxs, 25), 150);


    /* Determine a fudge factor to allow the refinement routines to get out
       of tight balancing constraints. */
    ffactor = .5/gk_max(20, nvtxs);

    /* Initialize the queues */
    queues = (rpq_t **)wspacemalloc(ctrl, 2*ncon*sizeof(rpq_t *));
    for (i=0; i<2*ncon; i++)
        queues[i] = rpqCreate(nvtxs);
    for (i=0; i<nvtxs; i++)
        qnum[i] = iargmax_nrm(ncon, vwgt+i*ncon, invtvwgt);

    /* Determine the unbalance tolerance for each constraint. The tolerance is
       equal to the maximum of the original load imbalance and the user-supplied
       allowed tolerance. The rationale behind this approach is to allow the
       refinement routine to improve the cut, without having to worry about fixing
       load imbalance problems. The load imbalance is addressed by the balancing
       routines. */
    origbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ctrl->ubfactors, ubfactors);
    for (i=0; i<ncon; i++)
        ubfactors[i] = (ubfactors[i] > 0 ? ctrl->ubfactors[i]+ubfactors[i] : ctrl->ubfactors[i]);


    IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
          Print2WayRefineStats(ctrl, graph, ntpwgts, origbal, -2));

    iset(nvtxs, -1, moved);
    for (pass=0; pass<niter; pass++) { /* Do a number of passes */
        for (i=0; i<2*ncon; i++)
            rpqReset(queues[i]);

        mincutorder = -1;
        newcut = mincut = initcut = graph->mincut;

        minbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, minbalv);

        ASSERT(ComputeCut(graph, where) == graph->mincut);
        ASSERT(CheckBnd(graph));

        /* Insert boundary nodes in the priority queues */
        nbnd = graph->nbnd;
        irandArrayPermute(nbnd, perm, nbnd/5, 1);
        for (ii=0; ii<nbnd; ii++) {
            i = bndind[perm[ii]];
            ASSERT(ed[i] > 0 || id[i] == 0);
            ASSERT(bndptr[i] != -1);
            //rgain = 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1);
            //rgain = (ed[i]-id[i] > 0 ? 1.0*(ed[i]-id[i])/sqrt(vwgt[i*ncon+qnum[i]]+1) : ed[i]-id[i]);
            rgain = ed[i]-id[i];
            rpqInsert(queues[2*qnum[i]+where[i]], i, rgain);
        }

        for (nswaps=0; nswaps<nvtxs; nswaps++) {
            SelectQueue(graph, ctrl->pijbm, ubfactors, queues, &from, &cnum);

            to = (from+1)%2;

            if (from == -1 || (higain = rpqGetTop(queues[2*cnum+from])) == -1)
                break;
            ASSERT(bndptr[higain] != -1);

            newcut -= (ed[higain]-id[higain]);

            iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
            iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
            newbal = ComputeLoadImbalanceDiffVec(graph, 2, ctrl->pijbm, ubfactors, newbalv);

            if ((newcut < mincut && newbal <= ffactor) ||
                    (newcut == mincut && (newbal < minbal ||
                                          (newbal == minbal && BetterBalance2Way(ncon, minbalv, newbalv))))) {
                mincut      = newcut;
                minbal      = newbal;
                mincutorder = nswaps;
                rcopy(ncon, newbalv, minbalv);
            }
            else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
                newcut += (ed[higain]-id[higain]);
                iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+from*ncon, 1);
                iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+to*ncon,   1);
                break;
            }

            where[higain] = to;
            moved[higain] = nswaps;
            swaps[nswaps] = higain;

            if (ctrl->dbglvl&METIS_DBG_MOVEINFO) {
                printf("Moved%6"PRIDX" from %"PRIDX"(%"PRIDX") Gain:%5"PRIDX", "
                       "Cut:%5"PRIDX", NPwgts:", higain, from, cnum, ed[higain]-id[higain], newcut);
                for (l=0; l<ncon; l++)
                    printf("(%.3"PRREAL" %.3"PRREAL")", pwgts[l]*invtvwgt[l], pwgts[ncon+l]*invtvwgt[l]);
                printf(" %+.3"PRREAL" LB: %.3"PRREAL"(%+.3"PRREAL")\n",
                       minbal, ComputeLoadImbalance(graph, 2, ctrl->pijbm), newbal);
            }


            /**************************************************************
            * Update the id[i]/ed[i] values of the affected nodes
            ***************************************************************/
            SWAP(id[higain], ed[higain], tmp);
            if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
                BNDDelete(nbnd, bndind,  bndptr, higain);

            for (j=xadj[higain]; j<xadj[higain+1]; j++) {
                k = adjncy[j];

                kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
                INC_DEC(id[k], ed[k], kwgt);

                /* Update its boundary information and queue position */
                if (bndptr[k] != -1) { /* If k was a boundary vertex */
                    if (ed[k] == 0) { /* Not a boundary vertex any more */
                        BNDDelete(nbnd, bndind, bndptr, k);
                        if (moved[k] == -1)  /* Remove it if in the queues */
                            rpqDelete(queues[2*qnum[k]+where[k]], k);
                    }
                    else { /* If it has not been moved, update its position in the queue */
                        if (moved[k] == -1) {
                            //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);
                            //rgain = (ed[k]-id[k] > 0 ?
                            //              1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);
                            rgain = ed[k]-id[k];
                            rpqUpdate(queues[2*qnum[k]+where[k]], k, rgain);
                        }
                    }
                }
                else {
                    if (ed[k] > 0) {  /* It will now become a boundary vertex */
                        BNDInsert(nbnd, bndind, bndptr, k);
                        if (moved[k] == -1) {
                            //rgain = 1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1);
                            //rgain = (ed[k]-id[k] > 0 ?
                            //              1.0*(ed[k]-id[k])/sqrt(vwgt[k*ncon+qnum[k]]+1) : ed[k]-id[k]);
                            rgain = ed[k]-id[k];
                            rpqInsert(queues[2*qnum[k]+where[k]], k, rgain);
                        }
                    }
                }
            }

        }


        /****************************************************************
        * Roll back computations
        *****************************************************************/
        for (i=0; i<nswaps; i++)
            moved[swaps[i]] = -1;  /* reset moved array */
        for (nswaps--; nswaps>mincutorder; nswaps--) {
            higain = swaps[nswaps];

            to = where[higain] = (where[higain]+1)%2;
            SWAP(id[higain], ed[higain], tmp);
            if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
                BNDDelete(nbnd, bndind,  bndptr, higain);
            else if (ed[higain] > 0 && bndptr[higain] == -1)
                BNDInsert(nbnd, bndind,  bndptr, higain);

            iaxpy(ncon,  1, vwgt+higain*ncon, 1, pwgts+to*ncon,         1);
            iaxpy(ncon, -1, vwgt+higain*ncon, 1, pwgts+((to+1)%2)*ncon, 1);
            for (j=xadj[higain]; j<xadj[higain+1]; j++) {
                k = adjncy[j];

                kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
                INC_DEC(id[k], ed[k], kwgt);

                if (bndptr[k] != -1 && ed[k] == 0)
                    BNDDelete(nbnd, bndind, bndptr, k);
                if (bndptr[k] == -1 && ed[k] > 0)
                    BNDInsert(nbnd, bndind, bndptr, k);
            }
        }

        graph->mincut = mincut;
        graph->nbnd   = nbnd;

        IFSET(ctrl->dbglvl, METIS_DBG_REFINE,
              Print2WayRefineStats(ctrl, graph, ntpwgts, minbal, mincutorder));

        if (mincutorder <= 0 || mincut == initcut)
            break;
    }

    for (i=0; i<2*ncon; i++)
        rpqDestroy(queues[i]);

    WCOREPOP;
}
Example #3
0
void Adaptive_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i;
  idx_t tewgt, tvsize;
  real_t gtewgt, gtvsize;
  real_t ubavg, lbavg, *lbvec;

  WCOREPUSH;

  lbvec = rwspacemalloc(ctrl, graph->ncon);

  /************************************/
  /* Set up important data structures */
  /************************************/
  CommSetup(ctrl, graph);

  ubavg   = ravg(graph->ncon, ctrl->ubvec);
  tewgt   = isum(graph->nedges, graph->adjwgt, 1);
  tvsize  = isum(graph->nvtxs, graph->vsize, 1);
  gtewgt  = (real_t) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs;  /* The +1/graph->gnvtxs were added to remove any FPE */
  gtvsize = (real_t) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio);

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"]\n", 
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
     (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    AllocateRefinementWorkSpace(ctrl, 2*graph->nedges);

    /***********************************************/
    /* Balance the partition on the coarsest graph */
    /***********************************************/
    graph->where = ismalloc(graph->nvtxs+graph->nrecv, -1, "graph->where");
    icopy(graph->nvtxs, graph->home, graph->where);

    ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    lbavg = ravg(graph->ncon, lbvec);

    if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION)
      Balance_Partition(ctrl, graph);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputePartitionParams(ctrl, graph);
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");

      /* free memory allocated by ComputePartitionParams */
      gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM);
    }

    /* check if no coarsening took place */
    if (graph->finer == NULL) {
      ComputePartitionParams(ctrl, graph);
      KWayBalance(ctrl, graph, graph->ncon);
      KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);
    }
  }
  else {
    /*******************************/
    /* Coarsen it and partition it */
    /*******************************/
    switch (ctrl->ps_relation) {
      case PARMETIS_PSR_COUPLED:
        Match_Local(ctrl, graph);
        break;
      case PARMETIS_PSR_UNCOUPLED:
      default:
        Match_Global(ctrl, graph);
        break;
    }

    Adaptive_Partition(ctrl, graph->coarser);

    /********************************/
    /* project partition and refine */
    /********************************/
    ProjectPartition(ctrl, graph);
    ComputePartitionParams(ctrl, graph);

    if (graph->ncon > 1 && graph->level < 4) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      lbavg = ravg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.025) {
        KWayBalance(ctrl, graph, graph->ncon);
      }
    }

    KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");
    }
  }

  WCOREPOP;
}
Example #4
0
/*************************************************************************
* This function is the entry point of the initial partitioning algorithm.
* This algorithm assembles the graph to all the processors and preceed
* serially.
**************************************************************************/
idx_t Mc_Diffusion(ctrl_t *ctrl, graph_t *graph, idx_t *vtxdist, idx_t *where, 
          idx_t *home, idx_t npasses)
{
  idx_t h, i, j;
  idx_t nvtxs, nedges, ncon, pass, iter, domain, processor;
  idx_t nparts, mype, npes, nlinks, me, you, wsize;
  idx_t nvisited, nswaps = -1, tnswaps, done, alldone = -1;
  idx_t *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap;
  idx_t *pack, *unpack, *match, *proc2sub, *sub2proc;
  idx_t *visited, *gvisited;
  real_t *transfer, *npwgts, maxdiff, minflow, maxflow;
  real_t lbavg, oldlbavg, ubavg, *lbvec;
  real_t *diff_flows, *sr_flows;
  real_t diff_lbavg, sr_lbavg, diff_cost, sr_cost;
  idx_t *rbuffer, *sbuffer; 
  idx_t *rcount, *rdispl;
  real_t *solution, *load, *workspace;
  matrix_t matrix;
  graph_t *egraph;

  if (graph->ncon > 3)
    return 0;

  WCOREPUSH;

  nvtxs  = graph->nvtxs;
  nedges = graph->nedges;
  ncon   = graph->ncon;

  nparts = ctrl->nparts;
  mype   = ctrl->mype;
  npes   = ctrl->npes;
  ubavg  = ravg(ncon, ctrl->ubvec);

  /* initialize variables and allocate memory */
  lbvec      = rwspacemalloc(ctrl, ncon);
  diff_flows = rwspacemalloc(ctrl, ncon);
  sr_flows   = rwspacemalloc(ctrl, ncon);

  load                       = rwspacemalloc(ctrl, nparts);
  solution                   = rwspacemalloc(ctrl, nparts);
  npwgts = graph->gnpwgts    = rwspacemalloc(ctrl, ncon*nparts);
  matrix.values              = rwspacemalloc(ctrl, nedges);
  transfer = matrix.transfer = rwspacemalloc(ctrl, ncon*nedges);

  proc2sub               = iwspacemalloc(ctrl, gk_max(nparts, npes*2));
  sub2proc               = iwspacemalloc(ctrl, nparts);
  match                  = iwspacemalloc(ctrl, nparts);
  rowptr = matrix.rowptr = iwspacemalloc(ctrl, nparts+1);
  colind = matrix.colind = iwspacemalloc(ctrl, nedges);

  rcount = iwspacemalloc(ctrl, npes);
  rdispl = iwspacemalloc(ctrl, npes+1);

  pack       = iwspacemalloc(ctrl, nvtxs);
  unpack     = iwspacemalloc(ctrl, nvtxs);
  rbuffer    = iwspacemalloc(ctrl, nvtxs);
  sbuffer    = iwspacemalloc(ctrl, nvtxs);
  map        = iwspacemalloc(ctrl, nvtxs);
  rmap       = iwspacemalloc(ctrl, nvtxs);
  diff_where = iwspacemalloc(ctrl, nvtxs);
  ehome      = iwspacemalloc(ctrl, nvtxs);


  wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "Mc_Diffusion: workspace");

  graph->ckrinfo = (ckrinfo_t *)gk_malloc(nvtxs*sizeof(ckrinfo_t), "Mc_Diffusion: rinfo");


  /* construct subdomain connectivity graph */
  matrix.nrows = nparts;
  SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);
  nlinks = (matrix.nnzs-nparts) / 2;

  visited  = iwspacemalloc(ctrl, matrix.nnzs);
  gvisited = iwspacemalloc(ctrl, matrix.nnzs);

  for (pass=0; pass<npasses; pass++) {
    rset(matrix.nnzs*ncon, 0.0, transfer);
    iset(matrix.nnzs, 0, gvisited);
    iset(matrix.nnzs, 0, visited);
    iter = nvisited = 0;

    /* compute ncon flow solutions */
    for (h=0; h<ncon; h++) {
      rset(nparts, 0.0, solution);
      ComputeLoad(graph, nparts, load, ctrl->tpwgts, h);

      lbvec[h] = (rmax(nparts, load)+1.0/nparts) * (real_t)nparts;

      ConjGrad2(&matrix, load, solution, 0.001, workspace);
      ComputeTransferVector(ncon, &matrix, solution, transfer, h);
    }

    oldlbavg = ravg(ncon, lbvec);
    tnswaps = 0;
    maxdiff = 0.0;
    for (i=0; i<nparts; i++) {
      for (j=rowptr[i]; j<rowptr[i+1]; j++) {
        maxflow = rmax(ncon, transfer+j*ncon);
        minflow = rmin(ncon, transfer+j*ncon);
        maxdiff = (maxflow - minflow > maxdiff) ? maxflow - minflow : maxdiff;
      }
    }

    while (nvisited < nlinks) {
      /* compute independent sets of subdomains */
      iset(gk_max(nparts, npes*2), UNMATCHED, proc2sub);
      CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon);

      /* set up the packing arrays */
      iset(nparts, UNMATCHED, sub2proc);
      for (i=0; i<npes*2; i++) {
        if (proc2sub[i] == UNMATCHED)
          break;

        sub2proc[proc2sub[i]] = i/2;
      }

      iset(npes, 0, rcount);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          rcount[processor]++;
      }

      rdispl[0] = 0;
      for (i=1; i<npes+1; i++)
        rdispl[i] = rdispl[i-1] + rcount[i-1];

      iset(nvtxs, UNMATCHED, unpack);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          unpack[rdispl[processor]++] = i;
      }

      SHIFTCSR(i, npes, rdispl);

      iset(nvtxs, UNMATCHED, pack);
      for (i=0; i<rdispl[npes]; i++) {
        ASSERT(unpack[i] != UNMATCHED);
        domain = where[unpack[i]];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          pack[unpack[i]] = i;
      }

      /* Compute the flows */
      if (proc2sub[mype*2] != UNMATCHED) {
        me  = proc2sub[2*mype];
        you = proc2sub[2*mype+1];
        ASSERT(me != you);

        for (j=rowptr[me]; j<rowptr[me+1]; j++) {
          if (colind[j] == you) {
            visited[j] = 1;
            rcopy(ncon, transfer+j*ncon, diff_flows);
            break;
          }
        }

        for (j=rowptr[you]; j<rowptr[you+1]; j++) {
          if (colind[j] == me) {
            visited[j] = 1;
            for (h=0; h<ncon; h++) {
              if (transfer[j*ncon+h] > 0.0)
                diff_flows[h] = -1.0 * transfer[j*ncon+h];
            }
            break;
          }
        } 

        nswaps = 1;
        rcopy(ncon, diff_flows, sr_flows);

        iset(nvtxs, 0, sbuffer);
        for (i=0; i<nvtxs; i++) {
          if (where[i] == me || where[i] == you)
            sbuffer[i] = 1;
        }

        egraph = ExtractGraph(ctrl, graph, sbuffer, map, rmap);

        if (egraph != NULL) {
          icopy(egraph->nvtxs, egraph->where, diff_where);
          for (j=0; j<egraph->nvtxs; j++)
            ehome[j] = home[map[j]];
 
          RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg);

          if (ncon <= 4) {
            sr_where      = egraph->where;
            egraph->where = diff_where;

            nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, 
                         &diff_cost, &diff_lbavg, 1.0/(real_t)nvtxs);

            if ((sr_lbavg < diff_lbavg &&
                (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) ||
                (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = sr_where[i];
            }
            else {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = diff_where[i];
            }
          }
          else {
            for (i=0; i<egraph->nvtxs; i++)
              where[map[i]] = egraph->where[i];
          }

          gk_free((void **)&egraph->xadj, &egraph->nvwgt, &egraph->adjncy, &egraph, LTERM);
        }

        /* Pack the flow data */
        iset(nvtxs, UNMATCHED, sbuffer);
        for (i=0; i<nvtxs; i++) {
          domain = where[i];
          if (domain == you || domain == me) 
            sbuffer[pack[i]] = where[i];
        }
      }

      /* Broadcast the flow data */
      gkMPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_T, 
          (void *)rbuffer, rcount, rdispl, IDX_T, ctrl->comm);

      /* Unpack the flow data */
      for (i=0; i<rdispl[npes]; i++) {
        if (rbuffer[i] != UNMATCHED) 
          where[unpack[i]] = rbuffer[i];
      }


      /* Do other stuff */
      gkMPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs,
          IDX_T, MPI_MAX, ctrl->comm);
      nvisited = isum(matrix.nnzs, gvisited, 1)/2;
      tnswaps += GlobalSESum(ctrl, nswaps);

      if (iter++ == NGD_PASSES)
        break;
    }

    /* perform serial refinement */
    Mc_ComputeSerialPartitionParams(ctrl, graph, nparts);
    Mc_SerialKWayAdaptRefine(ctrl, graph, nparts, home, ctrl->ubvec, 10);

    /* check for early breakout */
    for (h=0; h<ncon; h++) {
      lbvec[h] = (real_t)(nparts) *
        npwgts[rargmax_strd(nparts,npwgts+h,ncon)*ncon+h];
    }
    lbavg = ravg(ncon, lbvec);

    done = 0;
    if (tnswaps == 0 || lbavg >= oldlbavg || lbavg <= ubavg + 0.035)
      done = 1;

    alldone = GlobalSEMax(ctrl, done);
    if (alldone == 1)
      break;
  }

  /* ensure that all subdomains have at least one vertex */
/*
  iset(nparts, 0, match);
  for (i=0; i<nvtxs; i++)
    match[where[i]]++;

  done = 0;
  while (done == 0) {
    done = 1;

    me = iargmin(nparts, match);  
    if (match[me] == 0) {
      if (ctrl->mype == PE) printf("WARNING: empty subdomain %"PRIDX" in Mc_Diffusion\n", me);
      you = iargmax(nparts, match);  
      for (i=0; i<nvtxs; i++) {
        if (where[i] == you) {
          where[i] = me;
          match[you]--;
          match[me]++;
          done = 0;
          break;
        }
      }
    }
  }
*/
 
  /* now free memory and return */
  gk_free((void **)&workspace, (void **)&graph->ckrinfo, LTERM);
  graph->gnpwgts = NULL;
  graph->ckrinfo = NULL;

  WCOREPOP;

  return 0;
}
Example #5
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, nvtxs, nedges, ncon;
  idx_t mype, npes, srnpes, srmype; 
  idx_t *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
  idx_t *part, *lwhere, *home;
  idx_t lnparts, fpart, fpe, lnpes, ngroups;
  idx_t *rcounts, *rdispls;
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  idx_t sr_pe, gd_pe, sr, gd, who_wins;
  real_t my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
  real_t rating, max_rating, your_cost = -1.0, your_balance = -1.0;
  real_t lbsum, min_lbsum, *lbvec, *tpwgts, *tpwgts2, buffer[2];
  graph_t *agraph, cgraph;
  ctrl_t *myctrl;
  MPI_Status status;
  MPI_Comm ipcomm, srcomm;
  struct {
    double cost;
    int rank;
  } lpecost, gpecost;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));
  WCOREPUSH;

  vtxdist = graph->vtxdist;
  agraph  = AssembleAdaptiveGraph(ctrl, graph);
  nvtxs   = cgraph.nvtxs  = agraph->nvtxs;
  nedges  = cgraph.nedges = agraph->nedges;
  ncon    = cgraph.ncon   = agraph->ncon;
  xadj    = cgraph.xadj   = icopy(nvtxs+1, agraph->xadj, iwspacemalloc(ctrl, nvtxs+1));
  vwgt    = cgraph.vwgt   = icopy(nvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, nvtxs*ncon));
  vsize   = cgraph.vsize  = icopy(nvtxs, agraph->vsize, iwspacemalloc(ctrl, nvtxs));
  adjncy  = cgraph.adjncy = icopy(nedges, agraph->adjncy, iwspacemalloc(ctrl, nedges));
  adjwgt  = cgraph.adjwgt = icopy(nedges, agraph->adjwgt, iwspacemalloc(ctrl, nedges));
  part    = cgraph.where  = agraph->where = iwspacemalloc(ctrl, nvtxs);

  lwhere = iwspacemalloc(ctrl, nvtxs);
  home   = iwspacemalloc(ctrl, nvtxs);
  lbvec  = rwspacemalloc(ctrl, graph->ncon);


  /****************************************/
  /****************************************/
  if (ctrl->ps_relation == PARMETIS_PSR_UNCOUPLED) {
    WCOREPUSH;
    rcounts = iwspacemalloc(ctrl, ctrl->npes);
    rdispls = iwspacemalloc(ctrl, ctrl->npes+1);

    for (i=0; i<ctrl->npes; i++) 
      rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
    MAKECSR(i, ctrl->npes, rdispls);

    gkMPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_T,
        (void *)part, rcounts, rdispls, IDX_T, ctrl->comm);

    for (i=0; i<agraph->nvtxs; i++)
      home[i] = part[i];

    WCOREPOP;  /* local frees */
  }
  else {
    for (i=0; i<ctrl->npes; i++) {
      for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
        part[j] = home[j] = i;
    }
  }

  /* Ensure that the initial partitioning is legal */
  for (i=0; i<agraph->nvtxs; i++) {
    if (part[i] >= ctrl->nparts)
      part[i] = home[i] = part[i] % ctrl->nparts;
    if (part[i] < 0)
      part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
  }
  /****************************************/
  /****************************************/

  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      rprintf(ctrl, "input cut: %"PRIDX", balance: ", ComputeSerialEdgeCut(agraph)));
  for (i=0; i<agraph->ncon; i++)
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

  /****************************************/
  /* Split the processors into two groups */
  /****************************************/
  sr = (ctrl->mype % 2 == 0) ? 1 : 0;
  gd = (ctrl->mype % 2 == 1) ? 1 : 0;

  if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
    sr = 1;
    gd = 0;
  }

  sr_pe = 0;
  gd_pe = 1;

  gkMPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);

  if (sr == 1) { /* Half of the processors do scratch-remap */
    ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, npes), 1);
    gkMPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
    gkMPI_Comm_rank(srcomm, &srmype);
    gkMPI_Comm_size(srcomm, &srnpes);

    METIS_SetDefaultOptions(moptions);
    moptions[METIS_OPTION_SEED] = ctrl->sync + (mype % ngroups) + 1;

    tpwgts  = ctrl->tpwgts;
    tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

    iset(nvtxs, 0, lwhere);
    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = srnpes;
    while (lnpes > 1 && lnparts > 1) {
      PASSERT(ctrl, agraph->nvtxs > 1);
      /* determine the weights of the two partitions as a function of the 
         weight of the target partition weights */
      for (j=(lnparts>>1), i=0; i<ncon; i++) {
        tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
        tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
        wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
        tpwgts2[i]      *= wsum;
        tpwgts2[ncon+i] *= wsum;
      }

      METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, 
            agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, 
            &twoparts, tpwgts2, NULL, moptions, &edgecut, part);

      /* pick one of the branches */
      if (srmype < fpe+lnpes/2) {
        KeepPart(ctrl, agraph, part, 0);
        lnpes   = lnpes/2;
        lnparts = lnparts/2;
      }
      else {
        KeepPart(ctrl, agraph, part, 1);
        fpart   = fpart + lnparts/2;
        fpe     = fpe + lnpes/2;
        lnpes   = lnpes - lnpes/2;
        lnparts = lnparts - lnparts/2;
      }
    }

    if (lnparts == 1) { /* Case in which srnpes is greater or equal to nparts */
      /* Only the first process will assign labels (for the reduction to work) */
      if (srmype == fpe) {
        for (i=0; i<agraph->nvtxs; i++) 
          lwhere[agraph->label[i]] = fpart;
      }
    }
    else { /* Case in which srnpes is smaller than nparts */
      /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
      tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
      for (j=0; j<ncon; j++) {
        for (wsum=0.0, i=0; i<lnparts; i++) {
          tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
          wsum += tpwgts[i*ncon+j];
        }
        for (wsum=1.0/wsum, i=0; i<lnparts; i++)
          tpwgts[i*ncon+j] *= wsum;
      }

      METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
	    agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
            &edgecut, part);

      for (i=0; i<agraph->nvtxs; i++) 
        lwhere[agraph->label[i]] = fpart + part[i];
    }

    gkMPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_T, MPI_SUM, srcomm);

    edgecut = ComputeSerialEdgeCut(&cgraph);
    ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
    lbsum = rsum(ncon, lbvec, 1);
    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ipcomm);
    gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm);
    lpecost.rank = ctrl->mype;
    lpecost.cost = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) {
      if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon))
        lpecost.cost = (double)edgecut;
      else
        lpecost.cost = (double)max_cut + lbsum;
    }
    gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ipcomm);

    if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) 
      gkMPI_Send((void *)part, nvtxs, IDX_T, sr_pe, 1, ctrl->comm);

    if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) 
      gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status);

    if (ctrl->mype == sr_pe) {
      icopy(nvtxs, part, lwhere);
      SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
    }

    gkMPI_Comm_free(&srcomm);
  }
Example #6
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void RedoMyLink(ctrl_t *ctrl, graph_t *graph, idx_t *home, idx_t me,
                idx_t you, real_t *flows, real_t *sr_cost, real_t *sr_lbavg)
{
    idx_t h, i, r;
    idx_t nvtxs, nedges, ncon;
    idx_t  pass, lastseed, totalv;
    idx_t *xadj, *adjncy, *adjwgt, *where, *vsize;
    idx_t *costwhere, *lbwhere, *selectwhere;
    idx_t *ed, *id, *bndptr, *bndind, *perm;
    real_t *nvwgt, mycost;
    real_t lbavg, *lbvec;
    real_t best_lbavg, other_lbavg = -1.0, bestcost, othercost = -1.0;
    real_t *npwgts, *pwgts, *tpwgts;
    real_t ipc_factor, redist_factor, ftmp;
    idx_t mype;
    gkMPI_Comm_rank(MPI_COMM_WORLD, &mype);


    WCOREPUSH;

    nvtxs  = graph->nvtxs;
    nedges = graph->nedges;
    ncon   = graph->ncon;
    xadj   = graph->xadj;
    nvwgt  = graph->nvwgt;
    vsize  = graph->vsize;
    adjncy = graph->adjncy;
    adjwgt = graph->adjwgt;
    where  = graph->where;

    ipc_factor    = ctrl->ipc_factor;
    redist_factor = ctrl->redist_factor;

    /* set up data structures */
    id     = graph->sendind = iwspacemalloc(ctrl, nvtxs);
    ed     = graph->recvind = iwspacemalloc(ctrl, nvtxs);
    bndptr = graph->sendptr = iwspacemalloc(ctrl, nvtxs);
    bndind = graph->recvptr = iwspacemalloc(ctrl, nvtxs);

    costwhere = iwspacemalloc(ctrl, nvtxs);
    lbwhere   = iwspacemalloc(ctrl, nvtxs);
    perm      = iwspacemalloc(ctrl, nvtxs);

    lbvec  = rwspacemalloc(ctrl, ncon);
    pwgts  = rset(2*ncon, 0.0, rwspacemalloc(ctrl, 2*ncon));
    npwgts = rwspacemalloc(ctrl, 2*ncon);
    tpwgts = rwspacemalloc(ctrl, 2*ncon);

    graph->gnpwgts = npwgts;

    RandomPermute(nvtxs, perm, 1);
    icopy(nvtxs, where, costwhere);
    icopy(nvtxs, where, lbwhere);

    /* compute target pwgts */
    for (h=0; h<ncon; h++) {
        tpwgts[h]      = -1.0*flows[h];
        tpwgts[ncon+h] = flows[h];
    }

    for (i=0; i<nvtxs; i++) {
        if (where[i] == me) {
            for (h=0; h<ncon; h++) {
                tpwgts[h] += nvwgt[i*ncon+h];
                pwgts[h]  += nvwgt[i*ncon+h];
            }
        }
        else {
            ASSERT(where[i] == you);
            for (h=0; h<ncon; h++) {
                tpwgts[ncon+h] += nvwgt[i*ncon+h];
                pwgts[ncon+h]  += nvwgt[i*ncon+h];
            }
        }
    }

    /* we don't want any weights to be less than zero */
    for (h=0; h<ncon; h++) {
        if (tpwgts[h] < 0.0) {
            tpwgts[ncon+h] += tpwgts[h];
            tpwgts[h] = 0.0;
        }

        if (tpwgts[ncon+h] < 0.0) {
            tpwgts[h] += tpwgts[ncon+h];
            tpwgts[ncon+h] = 0.0;
        }
    }


    /* now compute new bisection */
    bestcost = (real_t)isum(nedges, adjwgt, 1)*ipc_factor +
               (real_t)isum(nvtxs, vsize, 1)*redist_factor;
    best_lbavg = 10.0;

    lastseed = 0;
    for (pass=N_MOC_REDO_PASSES; pass>0; pass--) {
        iset(nvtxs, 1, where);

        /* find seed vertices */
        r = perm[lastseed] % nvtxs;
        lastseed = (lastseed+1) % nvtxs;
        where[r] = 0;

        Mc_Serial_Compute2WayPartitionParams(ctrl, graph);
        Mc_Serial_Init2WayBalance(ctrl, graph, tpwgts);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);
        Mc_Serial_Balance2Way(ctrl, graph, tpwgts, 1.02);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);

        for (i=0; i<nvtxs; i++)
            where[i] = (where[i] == 0) ? me : you;

        for (i=0; i<ncon; i++) {
            ftmp = (pwgts[i]+pwgts[ncon+i])/2.0;
            if (ftmp != 0.0)
                lbvec[i] = fabs(npwgts[i]-tpwgts[i])/ftmp;
            else
                lbvec[i] = 0.0;
        }
        lbavg = ravg(ncon, lbvec);

        totalv = 0;
        for (i=0; i<nvtxs; i++)
            if (where[i] != home[i])
                totalv += vsize[i];

        mycost = (real_t)(graph->mincut)*ipc_factor + (real_t)totalv*redist_factor;

        if (bestcost >= mycost) {
            bestcost = mycost;
            other_lbavg = lbavg;
            icopy(nvtxs, where, costwhere);
        }

        if (best_lbavg >= lbavg) {
            best_lbavg = lbavg;
            othercost = mycost;
            icopy(nvtxs, where, lbwhere);
        }
    }

    if (other_lbavg <= .05) {
        selectwhere = costwhere;
        *sr_cost = bestcost;
        *sr_lbavg = other_lbavg;
    }
    else {
        selectwhere = lbwhere;
        *sr_cost = othercost;
        *sr_lbavg = best_lbavg;
    }

    icopy(nvtxs, selectwhere, where);

    WCOREPOP;
}