/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) { int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; KeyValueType *cand; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); qsizes[i][0] = qsizes[i][1] = 0; } for (i=0; i<nvtxs; i++) { qnum[i] = samax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = samax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; idxset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nvtxs; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nvtxs, cand); for (ii=0; ii<nvtxs; ii++) { i = cand[ii].val; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(oldgain), (float)(ed[k]-id[k])); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************************* * This function balances two partitions by moving the highest gain * (including negative gain) vertices to the other domain. * It is used only when tha unbalance is due to non contigous * subdomains. That is, the are no boundary vertices. * It moves vertices from the domain that is overweight to the one that * is underweight. **************************************************************************/ void Mc_Serial_Init2WayBalance(GraphType *graph, float *tpwgts) { int i, ii, j, k; int kwgt, nvtxs, nbnd, ncon, nswaps, from, to, cnum, tmp; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *qnum; float *nvwgt, *npwgts; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut; KeyValueType *cand; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; nvwgt = graph->nvwgt; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); /* This is called for initial partitioning so we know from where to pick nodes */ from = 1; to = (from+1)%2; for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); } /* Compute the queues in which each vertex will be assigned to */ for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); for (i=0; i<nvtxs; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nvtxs, cand); /* Insert the nodes of the proper partition in the appropriate priority queue */ for (ii=0; ii<nvtxs; ii++) { i = cand[ii].val; if (where[i] == from) { if (ed[i] > 0) FPQueueInsert(&parts[qnum[i]][0], i, (float)(ed[i]-id[i])); else FPQueueInsert(&parts[qnum[i]][1], i, (float)(ed[i]-id[i])); } } mincut = graph->mincut; nbnd = graph->gnvtxs; for (nswaps=0; nswaps<nvtxs; nswaps++) { if (Serial_AreAnyVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, tpwgts+from*ncon)) break; if ((cnum = Serial_SelectQueueOneWay(ncon, npwgts, tpwgts, from, parts)) == -1) break; if ((higain = FPQueueGetMax(&parts[cnum][0])) == -1) higain = FPQueueGetMax(&parts[cnum][1]); mincut -= (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); where[higain] = to; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (where[k] == from) { if (ed[k] > 0 && bndptr[k] == -1) { /* It moves in boundary */ FPQueueDelete(&parts[qnum[k]][1], k); FPQueueInsert(&parts[qnum[k]][0], k, (float)(ed[k]-id[k])); } else { /* It must be in the boundary already */ FPQueueUpdate(&parts[qnum[k]][0], k, (float)(oldgain), (float)(ed[k]-id[k])); } } /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, LTERM); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k; int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[MAXNCON*2]; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); for (i=0; i<ncon; i++) { rtpwgts[i] = origbal*tpwgts[i]; rtpwgts[ncon+i] = origbal*tpwgts[ncon+i]; } idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { FPQueueReset(&parts[i][0]); FPQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); /* Insert boundary nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nbnd; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nbnd, cand); for (ii=0; ii<nbnd; ii++) { i = bndind[cand[ii].val]; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ FPQueueDelete(&parts[qnum[k]][where[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ int BalanceMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me, int you, float *flows, float maxdiff, float *diff_cost, float *diff_lbavg, float avgvwgt) { int h, i, ii, j, k; int nvtxs, ncon; int nqueues, minval, maxval, higain, vtx, edge, totalv; int from, to, qnum, index, nchanges, cut, tmp; int pass, nswaps, nmoves, multiplier; idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id; idxtype *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes; float *nvwgt, lbvec[MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2], my_wgt[MAXNCON]; float newgain, oldgain = 0.0; float lbavg, bestflow, mycost; float ipc_factor, redist_factor, ftmp; FPQueueType *queues; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; ipc_factor = ctrl->ipc_factor; redist_factor = ctrl->redist_factor; hval = idxmalloc(nvtxs*7, "hval"); id = hval + nvtxs; ed = hval + nvtxs*2; map = hval + nvtxs*3; rmap = hval + nvtxs*4; myqueue = hval + nvtxs*5; changes = hval + nvtxs*6; sset(ncon*2, 0.0, pwgts); for (h=0; h<ncon; h++) { tpwgts[h] = -1.0 * flows[h]; tpwgts[ncon+h] = flows[h]; } for (i=0; i<nvtxs; i++) { if (where[i] == me) { for (h=0; h<ncon; h++) { tpwgts[h] += nvwgt[i*ncon+h]; pwgts[h] += nvwgt[i*ncon+h]; } } else { ASSERTS(where[i] == you); for (h=0; h<ncon; h++) { tpwgts[ncon+h] += nvwgt[i*ncon+h]; pwgts[ncon+h] += nvwgt[i*ncon+h]; } } } /* we don't want any tpwgts to be less than zero */ for (h=0; h<ncon; h++) { if (tpwgts[h] < 0.0) { tpwgts[ncon+h] += tpwgts[h]; tpwgts[h] = 0.0; } if (tpwgts[ncon+h] < 0.0) { tpwgts[h] += tpwgts[ncon+h]; tpwgts[ncon+h] = 0.0; } } /*******************************/ /* insert vertices into queues */ /*******************************/ minval = maxval = 0; multiplier = 1; for (i=0; i<ncon; i++) { multiplier *= (i+1); maxval += i*multiplier; minval += (ncon-1-i)*multiplier; } nqueues = maxval-minval+1; nvpq = idxsmalloc(nqueues, 0, "nvpq"); ptr = idxmalloc(nqueues+1, "ptr"); inq = idxmalloc(nqueues*2, "inq"); queues = (FPQueueType *)(GKmalloc(sizeof(FPQueueType)*nqueues*2, "queues")); for (i=0; i<nvtxs; i++) hval[i] = Moc_HashVwgts(ncon, nvwgt+i*ncon) - minval; for (i=0; i<nvtxs; i++) nvpq[hval[i]]++; ptr[0] = 0; for (i=0; i<nqueues; i++) ptr[i+1] = ptr[i] + nvpq[i]; for (i=0; i<nvtxs; i++) { map[i] = ptr[hval[i]]; rmap[ptr[hval[i]]++] = i; } for (i=nqueues-1; i>0; i--) ptr[i] = ptr[i-1]; ptr[0] = 0; /* initialize queues */ for (i=0; i<nqueues; i++) if (nvpq[i] > 0) { FPQueueInit(queues+i, nvpq[i]); FPQueueInit(queues+i+nqueues, nvpq[i]); } /* compute internal/external degrees */ idxset(nvtxs, 0, id); idxset(nvtxs, 0, ed); for (j=0; j<nvtxs; j++) for (k=xadj[j]; k<xadj[j+1]; k++) if (where[adjncy[k]] == where[j]) id[j] += adjwgt[k]; else ed[j] += adjwgt[k]; nswaps = 0; for (pass=0; pass<N_MOC_BAL_PASSES; pass++) { idxset(nvtxs, -1, myqueue); idxset(nqueues*2, 0, inq); /* insert vertices into correct queues */ for (j=0; j<nvtxs; j++) { index = (where[j] == me) ? 0 : nqueues; newgain = ipc_factor*(float)(ed[j]-id[j]); if (home[j] == me || home[j] == you) { if (where[j] == home[j]) newgain -= redist_factor*(float)vsize[j]; else newgain += redist_factor*(float)vsize[j]; } FPQueueInsert(queues+hval[j]+index, map[j]-ptr[hval[j]], newgain); myqueue[j] = (where[j] == me) ? 0 : 1; inq[hval[j]+index]++; } /* bestflow = sfavg(ncon, flows); */ for (j=0, h=0; h<ncon; h++) if (fabs(flows[h]) > fabs(flows[j])) j = h; bestflow = fabs(flows[j]); nchanges = nmoves = 0; for (ii=0; ii<nvtxs/2; ii++) { from = -1; Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from, &qnum, minval, avgvwgt, maxdiff); /* can't find a vertex in one subdomain, try the other */ if (from != -1 && qnum == -1) { from = (from == me) ? you : me; if (from == me) { for (j=0; j<ncon; j++) if (flows[j] > avgvwgt) break; } else { for (j=0; j<ncon; j++) if (flows[j] < -1.0*avgvwgt) break; } if (j != ncon) Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from, &qnum, minval, avgvwgt, maxdiff); } if (qnum == -1) break; to = (from == me) ? you : me; index = (from == me) ? 0 : nqueues; higain = FPQueueGetMax(queues+qnum+index); inq[qnum+index]--; ASSERTS(higain != -1); /*****************/ /* make the swap */ /*****************/ vtx = rmap[higain+ptr[qnum]]; myqueue[vtx] = -1; where[vtx] = to; nswaps++; nmoves++; /* update the flows */ for (j=0; j<ncon; j++) flows[j] += (to == me) ? nvwgt[vtx*ncon+j] : -1.0*nvwgt[vtx*ncon+j]; /* ftmp = sfavg(ncon, flows); */ for (j=0, h=0; h<ncon; h++) if (fabs(flows[h]) > fabs(flows[j])) j = h; ftmp = fabs(flows[j]); if (ftmp < bestflow) { bestflow = ftmp; nchanges = 0; } else { changes[nchanges++] = vtx; } SWAP(id[vtx], ed[vtx], tmp); for (j=xadj[vtx]; j<xadj[vtx+1]; j++) { edge = adjncy[j]; /* must compute oldgain before changing id/ed */ if (myqueue[edge] != -1) { oldgain = ipc_factor*(float)(ed[edge]-id[edge]); if (home[edge] == me || home[edge] == you) { if (where[edge] == home[edge]) oldgain -= redist_factor*(float)vsize[edge]; else oldgain += redist_factor*(float)vsize[edge]; } } tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[edge], ed[edge], tmp); if (myqueue[edge] != -1) { newgain = ipc_factor*(float)(ed[edge]-id[edge]); if (home[edge] == me || home[edge] == you) { if (where[edge] == home[edge]) newgain -= redist_factor*(float)vsize[edge]; else newgain += redist_factor*(float)vsize[edge]; } FPQueueUpdate(queues+hval[edge]+(nqueues*myqueue[edge]), map[edge]-ptr[hval[edge]], oldgain, newgain); } } } /****************************/ /* now go back to best flow */ /****************************/ nswaps -= nchanges; nmoves -= nchanges; for (i=0; i<nchanges; i++) { vtx = changes[i]; from = where[vtx]; where[vtx] = to = (from == me) ? you : me; SWAP(id[vtx], ed[vtx], tmp); for (j=xadj[vtx]; j<xadj[vtx+1]; j++) { edge = adjncy[j]; tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[edge], ed[edge], tmp); } } for (i=0; i<nqueues; i++) { if (nvpq[i] > 0) { FPQueueReset(queues+i); FPQueueReset(queues+i+nqueues); } } if (nmoves == 0) break; } /***************************/ /* compute 2-way imbalance */ /***************************/ sset(ncon, 0.0, my_wgt); for (i=0; i<nvtxs; i++) if (where[i] == me) for (h=0; h<ncon; h++) my_wgt[h] += nvwgt[i*ncon+h]; for (i=0; i<ncon; i++) { ftmp = (pwgts[i]+pwgts[ncon+i])/2.0; if (ftmp != 0.0) lbvec[i] = fabs(my_wgt[i]-tpwgts[i]) / ftmp; else lbvec[i] = 0.0; } lbavg = savg(ncon, lbvec); *diff_lbavg = lbavg; /****************/ /* compute cost */ /****************/ cut = totalv = 0; for (i=0; i<nvtxs; i++) { if (where[i] != home[i]) totalv += vsize[i]; for (j=xadj[i]; j<xadj[i+1]; j++) if (where[adjncy[j]] != where[i]) cut += adjwgt[j]; } cut /= 2; mycost = cut*ipc_factor + totalv*redist_factor; *diff_cost = mycost; /* free memory */ for (i=0; i<nqueues; i++) if (nvpq[i] > 0) { FPQueueFree(queues+i); FPQueueFree(queues+i+nqueues); } GKfree((void **)&hval, (void **)&nvpq, (void **)&ptr, (void **)&inq, (void **)&queues, LTERM); return nswaps; }