/************************************************************************* * This function computes the balance of the partitioning **************************************************************************/ void ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec) { int i, j, nvtxs, ncon; idxtype *kpwgts, *vwgt; /*float balance;*/ nvtxs = graph->nvtxs; ncon = graph->ncon; vwgt = graph->vwgt; kpwgts = idxsmalloc(nparts, 0, "ComputePartitionInfo: kpwgts"); if (vwgt == NULL) { for (i=0; i<nvtxs; i++) kpwgts[where[i]]++; ubvec[0] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*nvtxs); } else { for (j=0; j<ncon; j++) { idxset(nparts, 0, kpwgts); for (i=0; i<graph->nvtxs; i++) kpwgts[where[i]] += vwgt[i*ncon+j]; ubvec[j] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)); } } free(kpwgts); }
/************************************************************************* * This function is the driver for the partition refinement mode of ParMETIS **************************************************************************/ void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { SetUp(ctrl, graph, wspace); graph->ncon = 1; IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d][%d]\n", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo, GlobalSEMax(ctrl, graph->vwgt[idxamax(graph->nvtxs, graph->vwgt)]))); if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { /* Compute the initial npart-way multisection */ InitMultisection(ctrl, graph, wspace); if (graph->finer == NULL) { /* Do that only of no-coarsening took place */ ComputeNodePartitionParams(ctrl, graph, wspace); KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION); } } else { /* Coarsen it and the partition it */ Mc_LocalMatch_HEM(ctrl, graph, wspace); Order_Partition(ctrl, graph->coarser, wspace); Moc_ProjectPartition(ctrl, graph, wspace); ComputeNodePartitionParams(ctrl, graph, wspace); KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION); } }
/************************************************************************* * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) { int i, nvtxs, nparts, myhome; idxtype *vwgt, *where; idxtype *lend, *gend, *lleft, *gleft, *lstart, *gstart; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; nparts = ctrl->nparts; lstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lstart"); gstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gstart"); lleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lleft"); gleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gleft"); lend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lend"); gend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gend"); for (i=0; i<nvtxs; i++) { myhome = (ctrl->ps_relation == COUPLED) ? ctrl->mype : graph->home[i]; lstart[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; lend[where[i]] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; if (where[i] != myhome) lleft[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; } /* PrintVector(ctrl, ctrl->npes, 0, lend, "Lend: "); */ MPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); *nmoved = idxsum(nparts, gleft); *maxout = gleft[idxamax(nparts, gleft)]; for (i=0; i<nparts; i++) lstart[i] = gend[i]+gleft[i]-gstart[i]; *maxin = lstart[idxamax(nparts, lstart)]; GKfree((void **)&lstart, (void **)&gstart, (void **)&lleft, (void **)&gleft, (void **)&lend, (void **)&gend, LTERM); }
/************************************************************************* * This function reads the element node array of a mesh **************************************************************************/ idxtype *ReadMesh(char *filename, int *ne, int *nn, int *etype) { int i, j, k, esize; idxtype *elmnts; FILE *fpin; if ((fpin = fopen(filename, "r")) == NULL) { printf("Failed to open file %s\n", filename); exit(0); } if (fscanf(fpin, "%d %d", ne, etype) != 2) { printf("Header line of input file does not contain two numbers.\n"); exit(0); } switch (*etype) { case 1: esize = 3; break; case 2: esize = 4; break; case 3: esize = 8; break; case 4: esize = 4; break; default: errexit("Unknown mesh-element type: %d\n", *etype); } elmnts = idxmalloc(esize*(*ne), "ReadMesh: elmnts"); for (j=esize*(*ne), i=0; i<j; i++) { if (fscanf(fpin, "%d", elmnts+i) != 1) { printf("Missing node number %d for element %d\n", i%esize+1, i/esize); exit(0); } elmnts[i]--; } fclose(fpin); *nn = elmnts[idxamax(j, elmnts)]+1; return elmnts; }
/************************************************************************* * This function computes the balance of the element partitioning **************************************************************************/ float ComputeElementBalance(int ne, int nparts, idxtype *where) { int i; idxtype *kpwgts; float balance; kpwgts = idxsmalloc(nparts, 0, "ComputeElementBalance: kpwgts"); for (i=0; i<ne; i++) kpwgts[where[i]]++; balance = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)); free(kpwgts); return balance; }
/************************************************************************* * This function computes the subdomain graph **************************************************************************/ void EliminateSubDomainEdges(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts) { int i, ii, j, k, me, other, nvtxs, total, max, avg, totalout, nind, ncand, ncand2, target, target2, nadd; int min, move, cpwgt, tvwgt; idxtype *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, *pmat, *ndoms, *mypmat, *otherpmat, *ind; KeyValueType *cand, *cand2; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; adjwgt = graph->adjwgt; where = graph->where; pwgts = graph->pwgts; /* We assume that this is properly initialized */ maxpwgt = idxwspacemalloc(ctrl, nparts); ndoms = idxwspacemalloc(ctrl, nparts); otherpmat = idxwspacemalloc(ctrl, nparts); ind = idxwspacemalloc(ctrl, nvtxs); pmat = ctrl->wspace.pmat; cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); cand2 = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); /* Compute the pmat matrix and ndoms */ ComputeSubDomainGraph(graph, nparts, pmat, ndoms); /* Compute the maximum allowed weight for each domain */ tvwgt = idxsum(nparts, pwgts); for (i=0; i<nparts; i++) maxpwgt[i] = 1.25*tpwgts[i]*tvwgt; /* Get into the loop eliminating subdomain connections */ for (;;) { total = idxsum(nparts, ndoms); avg = total/nparts; max = ndoms[idxamax(nparts, ndoms)]; /* printf("Adjacent Subdomain Stats: Total: %3d, Max: %3d, Avg: %3d [%5d]\n", total, max, avg, idxsum(nparts*nparts, pmat)); */ if (max < 1.4*avg) break; me = idxamax(nparts, ndoms); mypmat = pmat + me*nparts; totalout = idxsum(nparts, mypmat); /*printf("Me: %d, TotalOut: %d,\n", me, totalout);*/ /* Sort the connections according to their cut */ for (ncand2=0, i=0; i<nparts; i++) { if (mypmat[i] > 0) { cand2[ncand2].key = mypmat[i]; cand2[ncand2++].val = i; } } ikeysort(ncand2, cand2); move = 0; for (min=0; min<ncand2; min++) { if (cand2[min].key > totalout/(2*ndoms[me])) break; other = cand2[min].val; /*printf("\tMinOut: %d to %d\n", mypmat[other], other);*/ idxset(nparts, 0, otherpmat); /* Go and find the vertices in 'other' that are connected in 'me' */ for (nind=0, i=0; i<nvtxs; i++) { if (where[i] == other) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (where[adjncy[j]] == me) { ind[nind++] = i; break; } } } } /* Go and construct the otherpmat to see where these nind vertices are connected to */ for (cpwgt=0, ii=0; ii<nind; ii++) { i = ind[ii]; cpwgt += vwgt[i]; for (j=xadj[i]; j<xadj[i+1]; j++) otherpmat[where[adjncy[j]]] += adjwgt[j]; } otherpmat[other] = 0; for (ncand=0, i=0; i<nparts; i++) { if (otherpmat[i] > 0) { cand[ncand].key = -otherpmat[i]; cand[ncand++].val = i; } } ikeysort(ncand, cand); /* * Go through and the select the first domain that is common with 'me', and * does not increase the ndoms[target] higher than my ndoms, subject to the * maxpwgt constraint. Traversal is done from the mostly connected to the least. */ target = target2 = -1; for (i=0; i<ncand; i++) { k = cand[i].val; if (mypmat[k] > 0) { if (pwgts[k] + cpwgt > maxpwgt[k]) /* Check if balance will go off */ continue; for (j=0; j<nparts; j++) { if (otherpmat[j] > 0 && ndoms[j] >= ndoms[me]-1 && pmat[nparts*j+k] == 0) break; } if (j == nparts) { /* No bad second level effects */ for (nadd=0, j=0; j<nparts; j++) { if (otherpmat[j] > 0 && pmat[nparts*k+j] == 0) nadd++; } /*printf("\t\tto=%d, nadd=%d, %d\n", k, nadd, ndoms[k]);*/ if (target2 == -1 && ndoms[k]+nadd < ndoms[me]) { target2 = k; } if (nadd == 0) { target = k; break; } } } } if (target == -1 && target2 != -1) target = target2; if (target == -1) { /* printf("\t\tCould not make the move\n");*/ continue; } /*printf("\t\tMoving to %d\n", target);*/ /* Update the partition weights */ INC_DEC(pwgts[target], pwgts[other], cpwgt); MoveGroupMConn(ctrl, graph, ndoms, pmat, nparts, target, nind, ind); move = 1; break; } if (move == 0) break; } idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nvtxs); GKfree(&cand, &cand2, LTERM); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Greedy_KWayEdgeBalanceMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses) { int i, ii, iii, j, jj, k, l, pass, nvtxs, nbnd, tvwgt, myndegrees, oldgain, gain, nmoves; int from, me, to, oldcut, vwgt, maxndoms, nadd; idxtype *xadj, *adjncy, *adjwgt; idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *moved, *itpwgts; idxtype *phtable, *pmat, *pmatptr, *ndoms; EDegreeType *myedegrees; RInfoType *myrinfo; PQueueType queue; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; pmat = ctrl->wspace.pmat; phtable = idxwspacemalloc(ctrl, nparts); ndoms = idxwspacemalloc(ctrl, nparts); ComputeSubDomainGraph(graph, nparts, pmat, ndoms); /* Setup the weight intervals of the various subdomains */ minwgt = idxwspacemalloc(ctrl, nparts); maxwgt = idxwspacemalloc(ctrl, nparts); itpwgts = idxwspacemalloc(ctrl, nparts); tvwgt = idxsum(nparts, pwgts); ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); for (i=0; i<nparts; i++) { itpwgts[i] = tpwgts[i]*tvwgt; maxwgt[i] = tpwgts[i]*tvwgt*ubfactor; minwgt[i] = tpwgts[i]*tvwgt*(1.0/ubfactor); } perm = idxwspacemalloc(ctrl, nvtxs); moved = idxwspacemalloc(ctrl, nvtxs); PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d [B]\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, graph->mincut)); for (pass=0; pass<npasses; pass++) { ASSERT(ComputeCut(graph, where) == graph->mincut); /* Check to see if things are out of balance, given the tolerance */ for (i=0; i<nparts; i++) { if (pwgts[i] > maxwgt[i]) break; } if (i == nparts) /* Things are balanced. Return right away */ break; PQueueReset(&queue); idxset(nvtxs, -1, moved); oldcut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; PQueueInsert(&queue, i, graph->rinfo[i].ed - graph->rinfo[i].id); moved[i] = 2; } maxndoms = ndoms[idxamax(nparts, ndoms)]; for (nmoves=0;;) { if ((i = PQueueGetMax(&queue)) == -1) break; moved[i] = 1; myrinfo = graph->rinfo+i; from = where[i]; vwgt = graph->vwgt[i]; if (pwgts[from]-vwgt < minwgt[from]) continue; /* This cannot be moved! */ myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; /* Determine the valid domains */ for (j=0; j<myndegrees; j++) { to = myedegrees[j].pid; phtable[to] = 1; pmatptr = pmat + to*nparts; for (nadd=0, k=0; k<myndegrees; k++) { if (k == j) continue; l = myedegrees[k].pid; if (pmatptr[l] == 0) { if (ndoms[l] > maxndoms-1) { phtable[to] = 0; nadd = maxndoms; break; } nadd++; } } if (ndoms[to]+nadd > maxndoms) phtable[to] = 0; } for (k=0; k<myndegrees; k++) { to = myedegrees[k].pid; if (!phtable[to]) continue; if (pwgts[to]+vwgt <= maxwgt[to] || itpwgts[from]*(pwgts[to]+vwgt) <= itpwgts[to]*pwgts[from]) break; } if (k == myndegrees) continue; /* break out if you did not find a candidate */ for (j=k+1; j<myndegrees; j++) { to = myedegrees[j].pid; if (!phtable[to]) continue; if (itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid]) k = j; } to = myedegrees[k].pid; if (pwgts[from] < maxwgt[from] && pwgts[to] > minwgt[to] && myedegrees[k].ed-myrinfo->id < 0) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= myedegrees[k].ed-myrinfo->id; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); /* Update pmat to reflect the move of 'i' */ pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); if (pmat[from*nparts+to] == 0) { ndoms[from]--; if (ndoms[from]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[to*nparts+from] == 0) { ndoms[to]--; if (ndoms[to]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } /* Update where, weight, and ID/ED information of the vertex you moved */ where[i] = to; INC_DEC(pwgts[to], pwgts[from], vwgt); myrinfo->ed += myrinfo->id-myedegrees[k].ed; SWAP(myrinfo->id, myedegrees[k].ed, j); if (myedegrees[k].ed == 0) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].pid = from; if (myrinfo->ed == 0) BNDDelete(nbnd, bndind, bndptr, i); /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->rinfo+ii; if (myrinfo->edegrees == NULL) { myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; } myedegrees = myrinfo->edegrees; ASSERT(CheckRInfo(myrinfo)); oldgain = (myrinfo->ed-myrinfo->id); if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); if (myrinfo->ed > 0 && bndptr[ii] == -1) BNDInsert(nbnd, bndind, bndptr, ii); } else if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); if (myrinfo->ed == 0 && bndptr[ii] != -1) BNDDelete(nbnd, bndind, bndptr, ii); } /* Remove contribution from the .ed of 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == from) { if (myedegrees[k].ed == adjwgt[j]) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].ed -= adjwgt[j]; break; } } } /* Add contribution to the .ed of 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == to) { myedegrees[k].ed += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].pid = to; myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; } } /* Update pmat to reflect the move of 'i' for domains other than 'from' and 'to' */ if (me != from && me != to) { pmat[me*nparts+from] -= adjwgt[j]; pmat[from*nparts+me] -= adjwgt[j]; if (pmat[me*nparts+from] == 0) { ndoms[me]--; if (ndoms[me]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[from*nparts+me] == 0) { ndoms[from]--; if (ndoms[from]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[me*nparts+to] == 0) { ndoms[me]++; if (ndoms[me] > maxndoms) { printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms); maxndoms = ndoms[me]; } } if (pmat[to*nparts+me] == 0) { ndoms[to]++; if (ndoms[to] > maxndoms) { printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms); maxndoms = ndoms[to]; } } pmat[me*nparts+to] += adjwgt[j]; pmat[to*nparts+me] += adjwgt[j]; } /* Update the queue */ if (me == to || me == from) { gain = myrinfo->ed-myrinfo->id; if (moved[ii] == 2) { if (myrinfo->ed > 0) PQueueUpdate(&queue, ii, oldgain, gain); else { PQueueDelete(&queue, ii, oldgain); moved[ii] = -1; } } else if (moved[ii] == -1 && myrinfo->ed > 0) { PQueueInsert(&queue, ii, gain); moved[ii] = 2; } } ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); ASSERT(CheckRInfo(myrinfo)); } nmoves++; } graph->nbnd = nbnd; IFSET(ctrl->dbglvl, DBG_REFINE, printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %6d, %d\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut,idxsum(nparts, ndoms))); } PQueueFree(ctrl, &queue); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Random_KWayEdgeRefineMConn(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses, int ffactor) { int i, ii, iii, j, jj, k, l, pass, nvtxs, nmoves, nbnd, tvwgt, myndegrees; int from, me, to, oldcut, vwgt, gain; int maxndoms, nadd; idxtype *xadj, *adjncy, *adjwgt; idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *itpwgts; idxtype *phtable, *pmat, *pmatptr, *ndoms; EDegreeType *myedegrees; RInfoType *myrinfo; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bndptr = graph->bndptr; bndind = graph->bndind; where = graph->where; pwgts = graph->pwgts; pmat = ctrl->wspace.pmat; phtable = idxwspacemalloc(ctrl, nparts); ndoms = idxwspacemalloc(ctrl, nparts); ComputeSubDomainGraph(graph, nparts, pmat, ndoms); /* Setup the weight intervals of the various subdomains */ minwgt = idxwspacemalloc(ctrl, nparts); maxwgt = idxwspacemalloc(ctrl, nparts); itpwgts = idxwspacemalloc(ctrl, nparts); tvwgt = idxsum(nparts, pwgts); ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); for (i=0; i<nparts; i++) { itpwgts[i] = tpwgts[i]*tvwgt; maxwgt[i] = tpwgts[i]*tvwgt*ubfactor; minwgt[i] = tpwgts[i]*tvwgt*(1.0/ubfactor); } perm = idxwspacemalloc(ctrl, nvtxs); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, graph->mincut)); for (pass=0; pass<npasses; pass++) { ASSERT(ComputeCut(graph, where) == graph->mincut); maxndoms = ndoms[idxamax(nparts, ndoms)]; oldcut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (nmoves=iii=0; iii<graph->nbnd; iii++) { ii = perm[iii]; if (ii >= nbnd) continue; i = bndind[ii]; myrinfo = graph->rinfo+i; if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */ from = where[i]; vwgt = graph->vwgt[i]; if (myrinfo->id > 0 && pwgts[from]-vwgt < minwgt[from]) continue; /* This cannot be moved! */ myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; /* Determine the valid domains */ for (j=0; j<myndegrees; j++) { to = myedegrees[j].pid; phtable[to] = 1; pmatptr = pmat + to*nparts; for (nadd=0, k=0; k<myndegrees; k++) { if (k == j) continue; l = myedegrees[k].pid; if (pmatptr[l] == 0) { if (ndoms[l] > maxndoms-1) { phtable[to] = 0; nadd = maxndoms; break; } nadd++; } } if (ndoms[to]+nadd > maxndoms) phtable[to] = 0; if (nadd == 0) phtable[to] = 2; } /* Find the first valid move */ j = myrinfo->id; for (k=0; k<myndegrees; k++) { to = myedegrees[k].pid; if (!phtable[to]) continue; gain = myedegrees[k].ed-j; /* j = myrinfo->id. Allow good nodes to move */ if (pwgts[to]+vwgt <= maxwgt[to]+ffactor*gain && gain >= 0) break; } if (k == myndegrees) continue; /* break out if you did not find a candidate */ for (j=k+1; j<myndegrees; j++) { to = myedegrees[j].pid; if (!phtable[to]) continue; if ((myedegrees[j].ed > myedegrees[k].ed && pwgts[to]+vwgt <= maxwgt[to]) || (myedegrees[j].ed == myedegrees[k].ed && itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) k = j; } to = myedegrees[k].pid; j = 0; if (myedegrees[k].ed-myrinfo->id > 0) j = 1; else if (myedegrees[k].ed-myrinfo->id == 0) { if (/*(iii&7) == 0 ||*/ phtable[myedegrees[k].pid] == 2 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) j = 1; } if (j == 0) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= myedegrees[k].ed-myrinfo->id; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); /* Update pmat to reflect the move of 'i' */ pmat[from*nparts+to] += (myrinfo->id-myedegrees[k].ed); pmat[to*nparts+from] += (myrinfo->id-myedegrees[k].ed); if (pmat[from*nparts+to] == 0) { ndoms[from]--; if (ndoms[from]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[to*nparts+from] == 0) { ndoms[to]--; if (ndoms[to]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } /* Update where, weight, and ID/ED information of the vertex you moved */ where[i] = to; INC_DEC(pwgts[to], pwgts[from], vwgt); myrinfo->ed += myrinfo->id-myedegrees[k].ed; SWAP(myrinfo->id, myedegrees[k].ed, j); if (myedegrees[k].ed == 0) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].pid = from; if (myrinfo->ed-myrinfo->id < 0) BNDDelete(nbnd, bndind, bndptr, i); /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->rinfo+ii; if (myrinfo->edegrees == NULL) { myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; } myedegrees = myrinfo->edegrees; ASSERT(CheckRInfo(myrinfo)); if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) BNDInsert(nbnd, bndind, bndptr, ii); } else if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) BNDDelete(nbnd, bndind, bndptr, ii); } /* Remove contribution from the .ed of 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == from) { if (myedegrees[k].ed == adjwgt[j]) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].ed -= adjwgt[j]; break; } } } /* Add contribution to the .ed of 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == to) { myedegrees[k].ed += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].pid = to; myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; } } /* Update pmat to reflect the move of 'i' for domains other than 'from' and 'to' */ if (me != from && me != to) { pmat[me*nparts+from] -= adjwgt[j]; pmat[from*nparts+me] -= adjwgt[j]; if (pmat[me*nparts+from] == 0) { ndoms[me]--; if (ndoms[me]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[from*nparts+me] == 0) { ndoms[from]--; if (ndoms[from]+1 == maxndoms) maxndoms = ndoms[idxamax(nparts, ndoms)]; } if (pmat[me*nparts+to] == 0) { ndoms[me]++; if (ndoms[me] > maxndoms) { printf("You just increased the maxndoms: %d %d\n", ndoms[me], maxndoms); maxndoms = ndoms[me]; } } if (pmat[to*nparts+me] == 0) { ndoms[to]++; if (ndoms[to] > maxndoms) { printf("You just increased the maxndoms: %d %d\n", ndoms[to], maxndoms); maxndoms = ndoms[to]; } } pmat[me*nparts+to] += adjwgt[j]; pmat[to*nparts+me] += adjwgt[j]; } ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); ASSERT(CheckRInfo(myrinfo)); } nmoves++; } } graph->nbnd = nbnd; IFSET(ctrl->dbglvl, DBG_REFINE, printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Nmoves: %5d, Cut: %5d, Vol: %5d, %d\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, nmoves, graph->mincut, ComputeVolume(graph, where), idxsum(nparts, ndoms))); if (graph->mincut == oldcut) break; } idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void MCGreedy_KWayEdgeBalanceHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, float *ubvec, int npasses) { int i, ii, /*iii,*/ j, /*jj,*/ k, /*l,*/ pass, nvtxs, ncon, nbnd, myndegrees, oldgain, gain, nmoves; int from, me, to, oldcut; idxtype *xadj, *adjncy, *adjwgt; idxtype *where, *perm, *bndptr, *bndind, *moved; EDegreeType *myedegrees; RInfoType *myrinfo; PQueueType queue; float *npwgts, *nvwgt, *minwgt, *maxwgt, tvec[MAXNCON]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; npwgts = graph->npwgts; /* Setup the weight intervals of the various subdomains */ minwgt = fwspacemalloc(ctrl, ncon*nparts); maxwgt = fwspacemalloc(ctrl, ncon*nparts); for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = ubvec[j]/nparts; minwgt[i*ncon+j] = 1.0/(ubvec[j]*nparts); } } perm = idxwspacemalloc(ctrl, nvtxs); moved = idxwspacemalloc(ctrl, nvtxs); PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); if (ctrl->dbglvl&DBG_REFINE) { printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ", npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], graph->nvtxs, graph->nbnd, graph->mincut); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("[B]\n"); } for (pass=0; pass<npasses; pass++) { ASSERT(ComputeCut(graph, where) == graph->mincut); /* Check to see if things are out of balance, given the tolerance */ if (MocIsHBalanced(ncon, nparts, npwgts, ubvec)) break; PQueueReset(&queue); idxset(nvtxs, -1, moved); oldcut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; PQueueInsert(&queue, i, graph->rinfo[i].ed - graph->rinfo[i].id); moved[i] = 2; } nmoves = 0; for (;;) { if ((i = PQueueGetMax(&queue)) == -1) break; moved[i] = 1; myrinfo = graph->rinfo+i; from = where[i]; nvwgt = graph->nvwgt+i*ncon; if (AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; /* This cannot be moved! */ myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; for (k=0; k<myndegrees; k++) { to = myedegrees[k].pid; if (IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)) break; } if (k == myndegrees) continue; /* break out if you did not find a candidate */ for (j=k+1; j<myndegrees; j++) { to = myedegrees[j].pid; if (IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)) k = j; } to = myedegrees[k].pid; j = 0; if (!AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, maxwgt+from*ncon)) j++; if (myedegrees[k].ed-myrinfo->id >= 0) j++; if (!AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) && AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon)) j++; if (j == 0) continue; /* DELETE if (myedegrees[k].ed-myrinfo->id < 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, maxwgt+from*ncon) && AreAllHVwgtsAbove(ncon, 1.0, npwgts+to*ncon, 0.0, nvwgt, minwgt+to*ncon) && AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon)) continue; */ /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= myedegrees[k].ed-myrinfo->id; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); /* Update where, weight, and ID/ED information of the vertex you moved */ saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-myedegrees[k].ed; SWAP(myrinfo->id, myedegrees[k].ed, j); if (myedegrees[k].ed == 0) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].pid = from; if (myrinfo->ed == 0) BNDDelete(nbnd, bndind, bndptr, i); /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->rinfo+ii; if (myrinfo->edegrees == NULL) { myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; } myedegrees = myrinfo->edegrees; ASSERT(CheckRInfo(myrinfo)); oldgain = (myrinfo->ed-myrinfo->id); if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); if (myrinfo->ed > 0 && bndptr[ii] == -1) BNDInsert(nbnd, bndind, bndptr, ii); } else if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); if (myrinfo->ed == 0 && bndptr[ii] != -1) BNDDelete(nbnd, bndind, bndptr, ii); } /* Remove contribution from the .ed of 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == from) { if (myedegrees[k].ed == adjwgt[j]) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].ed -= adjwgt[j]; break; } } } /* Add contribution to the .ed of 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == to) { myedegrees[k].ed += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].pid = to; myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; } } /* Update the queue */ if (me == to || me == from) { gain = myrinfo->ed-myrinfo->id; if (moved[ii] == 2) { if (myrinfo->ed > 0) PQueueUpdate(&queue, ii, oldgain, gain); else { PQueueDelete(&queue, ii, oldgain); moved[ii] = -1; } } else if (moved[ii] == -1 && myrinfo->ed > 0) { PQueueInsert(&queue, ii, gain); moved[ii] = 2; } } ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); ASSERT(CheckRInfo(myrinfo)); } nmoves++; } graph->nbnd = nbnd; if (ctrl->dbglvl&DBG_REFINE) { printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ", npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], nbnd, nmoves, graph->mincut); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("\n"); } if (nmoves == 0) break; } PQueueFree(ctrl, &queue); fwspacefree(ctrl, ncon*nparts); fwspacefree(ctrl, ncon*nparts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function converts a mesh into a dual graph **************************************************************************/ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, int *ncommonnodes, idxtype **xadj, idxtype **adjncy, MPI_Comm *comm) { int i, j, jj, k, kk, m; int npes, mype, pe, count, mask, pass; int nelms, lnns, my_nns, node; int firstelm, firstnode, lnode, nrecv, nsend; int *scounts, *rcounts, *sdispl, *rdispl; idxtype *nodedist, *nmap, *auxarray; idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL; idxtype *sbuffer, *rbuffer, *htable; KeyValueType *nodelist, *recvbuffer; idxtype ind[200], wgt[200]; int gmaxnode, gminnode; CtrlType ctrl; SetUpCtrl(&ctrl, -1, 0, *comm); npes = ctrl.npes; mype = ctrl.mype; nelms = elmdist[mype+1]-elmdist[mype]; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1); mask = (1<<11)-1; /*****************************/ /* Determine number of nodes */ /*****************************/ gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]); for (i=0; i<eptr[nelms]; i++) eind[i] -= gminnode; gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]); /**************************/ /* Check for input errors */ /**************************/ ASSERTS(nelms > 0); /* construct node distribution array */ nodedist = idxsmalloc(npes+1, 0, "nodedist"); for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) { k = j/(npes-i); nodedist[i+1] = nodedist[i]+k; j -= k; } my_nns = nodedist[mype+1]-nodedist[mype]; firstnode = nodedist[mype]; nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist"); auxarray = idxmalloc(eptr[nelms], "auxarray"); htable = idxsmalloc(amax(my_nns, mask+1), -1, "htable"); scounts = imalloc(4*npes+2, "scounts"); rcounts = scounts+npes; sdispl = scounts+2*npes; rdispl = scounts+3*npes+1; /*********************************************/ /* first find a local numbering of the nodes */ /*********************************************/ for (i=0; i<nelms; i++) { for (j=eptr[i]; j<eptr[i+1]; j++) { nodelist[j].key = eind[j]; nodelist[j].val = j; auxarray[j] = i; /* remember the local element ID that uses this node */ } } ikeysort(eptr[nelms], nodelist); for (count=1, i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) count++; } lnns = count; nmap = idxmalloc(lnns, "nmap"); /* renumber the nodes of the elements array */ count = 1; nmap[0] = nodelist[0].key; eind[nodelist[0].val] = 0; nodelist[0].val = auxarray[nodelist[0].val]; /* Store the local element ID */ for (i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) { nmap[count] = nodelist[i].key; count++; } eind[nodelist[i].val] = count-1; nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */ } MPI_Barrier(*comm); /**********************************************************/ /* perform comms necessary to construct node-element list */ /**********************************************************/ iset(npes, 0, scounts); for (pe=i=0; i<eptr[nelms]; i++) { while (nodelist[i].key >= nodedist[pe+1]) pe++; scounts[pe] += 2; } ASSERTS(pe < npes); MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); ASSERTS(sdispl[npes] == eptr[nelms]*2); nrecv = rdispl[npes]/2; recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer"); MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); /**************************************/ /* construct global node-element list */ /**************************************/ gnptr = idxsmalloc(my_nns+1, 0, "gnptr"); for (i=0; i<npes; i++) { for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; ASSERTS(lnode >= 0 && lnode < my_nns) gnptr[lnode]++; } } MAKECSR(i, my_nns, gnptr); gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind"); for (pe=0; pe<npes; pe++) { firstelm = elmdist[pe]; for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm; } } SHIFTCSR(i, my_nns, gnptr); /*********************************************************/ /* send the node-element info to the relevant processors */ /*********************************************************/ iset(npes, 0, scounts); /* use a hash table to ensure that each node is sent to a proc only once */ for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { scounts[pe] += gnptr[lnode+1]-gnptr[lnode]; htable[lnode] = 1; } } /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); /* create the send buffer */ nsend = sdispl[npes]; sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend)); count = 0; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) { if (k == gnptr[lnode]) sbuffer[count++] = -1*(gnind[k]+1); else sbuffer[count++] = gnind[k]; } htable[lnode] = 1; } } ASSERTS(count == sdispl[pe+1]); /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); nrecv = rdispl[npes]; rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv)); MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); k = -1; nptr = idxsmalloc(lnns+1, 0, "nptr"); nind = rbuffer; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]; j<rdispl[pe+1]; j++) { if (nind[j] < 0) { k++; nind[j] = (-1*nind[j])-1; } nptr[k]++; } } MAKECSR(i, lnns, nptr); ASSERTS(k+1 == lnns); ASSERTS(nptr[lnns] == nrecv) myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj"); idxset(mask+1, -1, htable); firstelm = elmdist[mype]; /* Two passes -- in first pass, simply find out the memory requirements */ for (pass=0; pass<2; pass++) { for (i=0; i<nelms; i++) { for (count=0, j=eptr[i]; j<eptr[i+1]; j++) { node = eind[j]; for (k=nptr[node]; k<nptr[node+1]; k++) { if ((kk=nind[k]) == firstelm+i) continue; m = htable[(kk&mask)]; if (m == -1) { ind[count] = kk; wgt[count] = 1; htable[(kk&mask)] = count++; } else { if (ind[m] == kk) { wgt[m]++; } else { for (jj=0; jj<count; jj++) { if (ind[jj] == kk) { wgt[jj]++; break; } } if (jj == count) { ind[count] = kk; wgt[count++] = 1; } } } } } for (j=0; j<count; j++) { htable[(ind[j]&mask)] = -1; if (wgt[j] >= *ncommonnodes) { if (pass == 0) myxadj[i]++; else myadjncy[myxadj[i]++] = ind[j]; } } } if (pass == 0) { MAKECSR(i, nelms, myxadj); myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy"); } else { SHIFTCSR(i, nelms, myxadj); } } /*****************************************/ /* correctly renumber the elements array */ /*****************************************/ for (i=0; i<eptr[nelms]; i++) eind[i] = nmap[eind[i]] + gminnode; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0); /* do not free nodelist, recvbuffer, rbuffer */ GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, (void **)&gnind, (void **)&auxarray, LTERM); FreeCtrl(&ctrl); return; }
int main(int argc, char *argv[]) { int i, npart; idxtype *part; float ncut=0; GraphType graph; char filename[256],outputFile[256]; int wgtflag = 0, addSelfLoop=1, outputFileGiven=0, txtFormat=0 ; int randomInit = 0; idxtype minEdgeWeight = 0; Options opt; timer TOTALTmr, METISTmr, IOTmr; initOptions(&opt); if (argc < 2) { print_help(argv[0]); exit(0); } for (argv++; *argv != NULL; argv++){ if ((*argv)[0] == '-') { int temp; switch ((*argv)[1]) { case 'b': case 'B': opt.penalty_power=atof(*(++argv)); break; case 'i': case 'I': opt.gamma=atof(*(++argv)); break; case 'o': case 'O': strcpy(outputFile,*(++argv)); outputFileGiven=1; break; case 'D'://quality threshold. This is a post-processing step proposed in SR-MCL. If you dont want post-processing (this is what original MLR-MCL, R-MCL, MCL do, please set "-d 0" case 'd': opt.quality_threshold = atof(*(++argv)); break; case 'w': case 'W': opt.weighted_density = true; break; case 'c': case 'C': opt.coarsenTo= atoi(*(++argv)); break; default: printf("Invalid option %s\n", *argv); print_help(argv[0]); exit(0); } } else { strcpy(filename, *argv); } } if ( randomInit > 0 ) InitRandom(time(NULL)); else InitRandom(-1); cleartimer(TOTALTmr); cleartimer(METISTmr); cleartimer(IOTmr); starttimer(TOTALTmr); starttimer(IOTmr); ReadGraph(&graph, filename, &wgtflag, addSelfLoop, txtFormat); if ( opt.matchType == MATCH_UNSPECIFIED ) { // opt.matchType = (graph.nvtxs>50000) ? MATCH_POWERLAW_FC : // MATCH_SHEMN; opt.matchType = MATCH_SHEMN; } stoptimer(IOTmr); if (graph.nvtxs <= 0) { printf("Empty graph. Nothing to do.\n"); exit(0); } int noOfSingletons = 0; GraphType *noSingletonGraph ; idxtype* nodeMap = lookForSingletons(&graph, &noOfSingletons); if ( noOfSingletons > 0 ) { getSubgraph(&graph, nodeMap, graph.nvtxs-noOfSingletons, wgtflag, &noSingletonGraph); GKfree((void**)&(graph.xadj), (void**)&(graph.adjncy), LTERM); if ( wgtflag&1 > 0 ) GKfree( (void**)&(graph.adjwgt), LTERM); // free(graph.gdata); printf("Found %d singleton nodes in the", noOfSingletons); printf(" input graph. Removing them.\n"); } if ( !outputFileGiven ) { strcpy(outputFile, filename); sprintf(outputFile,"%s.c%d.i%1.1f.b%1.1f",outputFile,opt.coarsenTo,opt.gamma,opt.penalty_power); } printf("Input graph information ---------------------------------------------------\n"); printf(" Name: %s, #Vertices: %d, #Edges: %d\n", filename, graph.nvtxs, graph.nedges/2); printf("Output shall be placed in the file %s\n", outputFile); fflush(stdout); part = idxmalloc(graph.nvtxs, "main: part"); printf("------------------------------------------------\n"); printf("Clustering....\n"); fflush(stdout); starttimer(METISTmr); //YK: main algorithm starts here! if ( noOfSingletons > 0 ) { mlmcl(&(noSingletonGraph->nvtxs), noSingletonGraph->xadj, noSingletonGraph->adjncy, noSingletonGraph->vwgt,noSingletonGraph->adjwgt, &wgtflag, part, opt ); } else { mlmcl(&graph.nvtxs, graph.xadj, graph.adjncy,graph.vwgt, graph.adjwgt, &wgtflag, part, opt ); } stoptimer(METISTmr); printf("------------------------------------------------\n"); if ( noOfSingletons > 0 ) { npart=mapPartition(part,noSingletonGraph->nvtxs); ncut=ComputeNCut(noSingletonGraph, part,npart); // printf("In graph that does not include singletons,"); // printf("No. of Clusters: %d, N-Cut value: %.2f\n",npart,ncut); idxtype *clusterSizes = histogram(part, graph.nvtxs-noOfSingletons, npart); int maxSize = clusterSizes[idxamax(npart, clusterSizes)]; float avgClusterSize = (graph.nvtxs-noOfSingletons)*1.0/(npart); float balance = (maxSize*1.0) / ((graph.nvtxs-noOfSingletons)*1.0/npart); float stdDevn = stdDeviation(clusterSizes, npart); float avgNcut = ncut * 1.0/npart; float normStdDevn = stdDevn/avgClusterSize; // Warning: This computation only works if the singletons // have been placed in their own clusters. This works for // MLR-MCL, in other words, because it is guaranteed to // place singletons in their own clusters. printf("Output statistics for graph without singletons\n"); printf("Clusters: %d N-Cut: %.3f", npart, ncut); printf(" AvgN-Cut: %.3f Balance in cluster sizes: %.2f ",avgNcut, balance); printf("Std_Deviation in cluster sizes: %.2f ", stdDevn); printf("Coefficient_of_Variation: %.2f\n", normStdDevn); free( clusterSizes ); npart += noOfSingletons; // ncut += noOfSingletons; printf("Output statistics for original graph\n"); mapIndices(part, nodeMap, graph.nvtxs, npart-noOfSingletons); } else { npart=mapPartition(part,graph.nvtxs); ncut=ComputeNCut(&graph, part,npart); } idxtype* clusterSizes = histogram(part, graph.nvtxs, npart); int maxSize = clusterSizes[idxamax(npart, clusterSizes)]; float avgClusterSize = (graph.nvtxs)*1.0/(npart); float balance = (maxSize*1.0)/(graph.nvtxs*1.0/npart); float stdDevn = stdDeviation(clusterSizes, npart); float avgNcut = ncut * 1.0/npart; float normStdDevn = stdDevn/avgClusterSize; printf("Clusters: %d N-Cut: %.3f AvgN-Cut: %.3f", npart, ncut, avgNcut ); printf(" Balance in cluster sizes: %.2f Std.Deviation in cluster sizes: %.2f ", balance, stdDevn); printf("Coefficient_of_Variation: %.2f\n", normStdDevn); starttimer(IOTmr); my_WritePartition(outputFile, part, graph.nvtxs, opt.gamma); if ( noOfSingletons > 0 ) { free(nodeMap); nodeMap = NULL; } printf("\nOutput is written to file: %s\n", outputFile); stoptimer(IOTmr); stoptimer(TOTALTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t\t %7.3f\n", gettimer(IOTmr)); printf(" Partitioning: \t\t %7.3f (MLR-MCL time)\n", gettimer(METISTmr)); printf(" Total: \t\t %7.3f\n", gettimer(TOTALTmr)); printf("**********************************************************************\n"); GKfree((void**)&graph.xadj, (void**)&graph.adjncy, (void**)&graph.vwgt, (void**)&graph.adjwgt, (void**)&part, LTERM); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Greedy_KWayEdgeRefine(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, float ubfactor, int npasses) { int i, ii, iii, j, jj, k, l, pass, nvtxs, nbnd, tvwgt, myndegrees, oldgain, gain; int from, me, to, oldcut, vwgt; idxtype *xadj, *adjncy, *adjwgt; idxtype *where, *pwgts, *perm, *bndptr, *bndind, *minwgt, *maxwgt, *moved, *itpwgts; EDegreeType *myedegrees; RInfoType *myrinfo; PQueueType queue; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; /* Setup the weight intervals of the various subdomains */ minwgt = idxwspacemalloc(ctrl, nparts); maxwgt = idxwspacemalloc(ctrl, nparts); itpwgts = idxwspacemalloc(ctrl, nparts); tvwgt = idxsum(nparts, pwgts); ASSERT(tvwgt == idxsum(nvtxs, graph->vwgt)); for (i=0; i<nparts; i++) { itpwgts[i] = tpwgts[i]*tvwgt; maxwgt[i] = tpwgts[i]*tvwgt*ubfactor; minwgt[i] = tpwgts[i]*tvwgt*(1.0/ubfactor); } perm = idxwspacemalloc(ctrl, nvtxs); moved = idxwspacemalloc(ctrl, nvtxs); PQueueInit(ctrl, &queue, nvtxs, graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d]-[%6d %6d], Balance: %5.3f, Nv-Nb[%6d %6d]. Cut: %6d\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], minwgt[0], maxwgt[0], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nvtxs, graph->nbnd, graph->mincut)); for (pass=0; pass<npasses; pass++) { ASSERT(ComputeCut(graph, where) == graph->mincut); PQueueReset(&queue); idxset(nvtxs, -1, moved); oldcut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; PQueueInsert(&queue, i, graph->rinfo[i].ed - graph->rinfo[i].id); moved[i] = 2; } for (iii=0;; iii++) { if ((i = PQueueGetMax(&queue)) == -1) break; moved[i] = 1; myrinfo = graph->rinfo+i; from = where[i]; vwgt = graph->vwgt[i]; if (pwgts[from]-vwgt < minwgt[from]) continue; /* This cannot be moved! */ myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; j = myrinfo->id; for (k=0; k<myndegrees; k++) { to = myedegrees[k].pid; gain = myedegrees[k].ed-j; /* j = myrinfo->id. Allow good nodes to move */ if (pwgts[to]+vwgt <= maxwgt[to]+gain && gain >= 0) break; } if (k == myndegrees) continue; /* break out if you did not find a candidate */ for (j=k+1; j<myndegrees; j++) { to = myedegrees[j].pid; if ((myedegrees[j].ed > myedegrees[k].ed && pwgts[to]+vwgt <= maxwgt[to]) || (myedegrees[j].ed == myedegrees[k].ed && itpwgts[myedegrees[k].pid]*pwgts[to] < itpwgts[to]*pwgts[myedegrees[k].pid])) k = j; } to = myedegrees[k].pid; j = 0; if (myedegrees[k].ed-myrinfo->id > 0) j = 1; else if (myedegrees[k].ed-myrinfo->id == 0) { if ((iii&7) == 0 || pwgts[from] >= maxwgt[from] || itpwgts[from]*(pwgts[to]+vwgt) < itpwgts[to]*pwgts[from]) j = 1; } if (j == 0) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= myedegrees[k].ed-myrinfo->id; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); /* Update where, weight, and ID/ED information of the vertex you moved */ where[i] = to; INC_DEC(pwgts[to], pwgts[from], vwgt); myrinfo->ed += myrinfo->id-myedegrees[k].ed; SWAP(myrinfo->id, myedegrees[k].ed, j); if (myedegrees[k].ed == 0) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].pid = from; if (myrinfo->ed < myrinfo->id) BNDDelete(nbnd, bndind, bndptr, i); /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->rinfo+ii; if (myrinfo->edegrees == NULL) { myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; } myedegrees = myrinfo->edegrees; ASSERT(CheckRInfo(myrinfo)); oldgain = (myrinfo->ed-myrinfo->id); if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) BNDInsert(nbnd, bndind, bndptr, ii); } else if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) BNDDelete(nbnd, bndind, bndptr, ii); } /* Remove contribution from the .ed of 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == from) { if (myedegrees[k].ed == adjwgt[j]) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].ed -= adjwgt[j]; break; } } } /* Add contribution to the .ed of 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == to) { myedegrees[k].ed += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].pid = to; myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; } } /* Update the queue */ if (me == to || me == from) { gain = myrinfo->ed-myrinfo->id; if (moved[ii] == 2) { if (gain >= 0) PQueueUpdate(&queue, ii, oldgain, gain); else { PQueueDelete(&queue, ii, oldgain); moved[ii] = -1; } } else if (moved[ii] == -1 && gain >= 0) { PQueueInsert(&queue, ii, gain); moved[ii] = 2; } } ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); ASSERT(CheckRInfo(myrinfo)); } } graph->nbnd = nbnd; IFSET(ctrl->dbglvl, DBG_REFINE, printf("\t[%6d %6d], Balance: %5.3f, Nb: %6d. Cut: %6d\n", pwgts[idxamin(nparts, pwgts)], pwgts[idxamax(nparts, pwgts)], 1.0*nparts*pwgts[idxamax(nparts, pwgts)]/tvwgt, graph->nbnd, graph->mincut)); if (graph->mincut == oldcut) break; } PQueueFree(ctrl, &queue); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function reads a mesh from a file **************************************************************************/ void ParallelReadMesh(MeshType *mesh, char *filename, MPI_Comm comm) { int i, j, k, pe; int npes, mype, ier; int gnelms, nelms, your_nelms, etype, maxnelms; int maxnode, gmaxnode, minnode, gminnode; idxtype *elmdist, *elements; idxtype *your_elements; MPI_Status stat; char *line = NULL, *oldstr, *newstr; FILE *fpin = NULL; int esize, esizes[5] = {-1, 3, 4, 8, 4}; int mgcnum, mgcnums[5] = {-1, 2, 3, 4, 2}; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); elmdist = mesh->elmdist = idxsmalloc(npes+1, 0, "ReadGraph: elmdist"); if (mype == npes-1) { ier = 0; fpin = fopen(filename, "r"); if (fpin == NULL){ printf("COULD NOT OPEN FILE '%s' FOR SOME REASON!\n", filename); ier++; } MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm); if (ier > 0){ fclose(fpin); MPI_Finalize(); exit(0); } line = (char *)GKmalloc(sizeof(char)*(MAXLINE+1), "line"); fgets(line, MAXLINE, fpin); sscanf(line, "%d %d", &gnelms, &etype); /* Construct elmdist and send it to all the processors */ elmdist[0] = 0; for (i=0,j=gnelms; i<npes; i++) { k = j/(npes-i); elmdist[i+1] = elmdist[i]+k; j -= k; } MPI_Bcast((void *)elmdist, npes+1, IDX_DATATYPE, npes-1, comm); } else { MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm); if (ier > 0){ MPI_Finalize(); exit(0); } MPI_Bcast((void *)elmdist, npes+1, IDX_DATATYPE, npes-1, comm); } MPI_Bcast((void *)(&etype), 1, MPI_INT, npes-1, comm); gnelms = mesh->gnelms = elmdist[npes]; nelms = mesh->nelms = elmdist[mype+1]-elmdist[mype]; mesh->etype = etype; esize = esizes[etype]; mgcnum = mgcnums[etype]; elements = mesh->elements = idxmalloc(nelms*esize, "ParallelReadMesh: elements"); if (mype == npes-1) { maxnelms = 0; for (i=0; i<npes; i++) { maxnelms = (maxnelms > elmdist[i+1]-elmdist[i]) ? maxnelms : elmdist[i+1]-elmdist[i]; } your_elements = idxmalloc(maxnelms*esize, "your_elements"); for (pe=0; pe<npes; pe++) { your_nelms = elmdist[pe+1]-elmdist[pe]; for (i=0; i<your_nelms; i++) { fgets(line, MAXLINE, fpin); oldstr = line; newstr = NULL; /*************************************/ /* could get element weigts here too */ /*************************************/ for (j=0; j<esize; j++) { your_elements[i*esize+j] = (int)strtol(oldstr, &newstr, 10); oldstr = newstr; } } if (pe < npes-1) { MPI_Send((void *)your_elements, your_nelms*esize, IDX_DATATYPE, pe, 0, comm); } else { for (i=0; i<your_nelms*esize; i++) elements[i] = your_elements[i]; } } fclose(fpin); free(your_elements); } else { MPI_Recv((void *)elements, nelms*esize, IDX_DATATYPE, npes-1, 0, comm, &stat); } /*********************************/ /* now check for number of nodes */ /*********************************/ minnode = elements[idxamin(nelms*esize, elements)]; MPI_Allreduce((void *)&minnode, (void *)&gminnode, 1, MPI_INT, MPI_MIN, comm); for (i=0; i<nelms*esize; i++) elements[i] -= gminnode; maxnode = elements[idxamax(nelms*esize, elements)]; MPI_Allreduce((void *)&maxnode, (void *)&gmaxnode, 1, MPI_INT, MPI_MAX, comm); mesh->gnns = gmaxnode+1; if (mype==0) printf("Nelements: %d, Nnodes: %d, EType: %d\n", gnelms, mesh->gnns, etype); }
/************************************************************************* * This function computes cuts and balance information **************************************************************************/ void ComputePartitionInfo(GraphType *graph, int nparts, idxtype *where) { int i, j, /*k,*/ nvtxs, ncon, mustfree=0; idxtype *xadj, *adjncy, *vwgt, *adjwgt, *kpwgts, *tmpptr; idxtype *padjncy, *padjwgt, *padjcut; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; adjwgt = graph->adjwgt; if (vwgt == NULL) { vwgt = graph->vwgt = idxsmalloc(nvtxs, 1, "vwgt"); mustfree = 1; } if (adjwgt == NULL) { adjwgt = graph->adjwgt = idxsmalloc(xadj[nvtxs], 1, "adjwgt"); mustfree += 2; } printf("%d-way Cut: %5d, Vol: %5d, ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); /* Compute balance information */ kpwgts = idxsmalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); for (i=0; i<nvtxs; i++) { for (j=0; j<ncon; j++) kpwgts[where[i]*ncon+j] += vwgt[i*ncon+j]; } if (ncon == 1) { printf("\tBalance: %5.3f out of %5.3f\n", 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)), 1.0*nparts*vwgt[idxamax(nvtxs, vwgt)]/(1.0*idxsum(nparts, kpwgts))); } else { printf("\tBalance:"); for (j=0; j<ncon; j++) printf(" (%5.3f out of %5.3f)", 1.0*nparts*kpwgts[ncon*idxamax_strd(nparts, kpwgts+j, ncon)+j]/(1.0*idxsum_strd(nparts, kpwgts+j, ncon)), 1.0*nparts*vwgt[ncon*idxamax_strd(nvtxs, vwgt+j, ncon)+j]/(1.0*idxsum_strd(nparts, kpwgts+j, ncon))); printf("\n"); } /* Compute p-adjncy information */ padjncy = idxsmalloc(nparts*nparts, 0, "ComputePartitionInfo: padjncy"); padjwgt = idxsmalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); padjcut = idxsmalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); idxset(nparts, 0, kpwgts); for (i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (where[i] != where[adjncy[j]]) { padjncy[where[i]*nparts+where[adjncy[j]]] = 1; padjcut[where[i]*nparts+where[adjncy[j]]] += adjwgt[j]; if (kpwgts[where[adjncy[j]]] == 0) { padjwgt[where[i]*nparts+where[adjncy[j]]]++; kpwgts[where[adjncy[j]]] = 1; } } } for (j=xadj[i]; j<xadj[i+1]; j++) kpwgts[where[adjncy[j]]] = 0; } for (i=0; i<nparts; i++) kpwgts[i] = idxsum(nparts, padjncy+i*nparts); printf("Min/Max/Avg/Bal # of adjacent subdomains: %5d %5d %5.2f %7.3f\n", kpwgts[idxamin(nparts, kpwgts)], kpwgts[idxamax(nparts, kpwgts)], 1.0*idxsum(nparts, kpwgts)/(1.0*nparts), 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts))); for (i=0; i<nparts; i++) kpwgts[i] = idxsum(nparts, padjcut+i*nparts); printf("Min/Max/Avg/Bal # of adjacent subdomain cuts: %5d %5d %5d %7.3f\n", kpwgts[idxamin(nparts, kpwgts)], kpwgts[idxamax(nparts, kpwgts)], idxsum(nparts, kpwgts)/nparts, 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts))); for (i=0; i<nparts; i++) kpwgts[i] = idxsum(nparts, padjwgt+i*nparts); printf("Min/Max/Avg/Bal/Frac # of interface nodes: %5d %5d %5d %7.3f %7.3f\n", kpwgts[idxamin(nparts, kpwgts)], kpwgts[idxamax(nparts, kpwgts)], idxsum(nparts, kpwgts)/nparts, 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)), 1.0*idxsum(nparts, kpwgts)/(1.0*nvtxs)); tmpptr = graph->where; graph->where = where; for (i=0; i<nparts; i++) IsConnectedSubdomain(NULL, graph, i, 1); graph->where = tmpptr; if (mustfree == 1 || mustfree == 3) { free(vwgt); graph->vwgt = NULL; } if (mustfree == 2 || mustfree == 3) { free(adjwgt); graph->adjwgt = NULL; } GKfree((void**)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasses) { int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; idxtype *moved, *swaps, *perm; PQueueType parts[2]; int higain, oldgain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); limit = (int) amin(amax(0.01*nvtxs, 15), 100); avgvwgt = amin((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; PQueueInit(ctrl, &parts[0], nvtxs, tmp); PQueueInit(ctrl, &parts[1], nvtxs, tmp); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d\n", pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); origdiff = abs(tpwgts[0]-pwgts[0]); idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ PQueueReset(&parts[0]); PQueueReset(&parts[1]); mincutorder = -1; newcut = mincut = initcut = graph->mincut; mindiff = abs(tpwgts[0]-pwgts[0]); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = perm[ii]; ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); PQueueInsert(&parts[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1); to = (from+1)%2; if ((higain = PQueueGetMax(&parts[from])) == -1) break; ASSERT(bndptr[higain] != -1); newcut -= (ed[higain]-id[higain]); INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); if ((newcut < mincut && abs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || (newcut == mincut && abs(tpwgts[0]-pwgts[0]) < mindiff)) { mincut = newcut; mindiff = abs(tpwgts[0]-pwgts[0]); mincutorder = nswaps; } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); INC_DEC(pwgts[from], pwgts[to], vwgt[higain]); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ PQueueDelete(&parts[where[k]], k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) PQueueUpdate(&parts[where[k]], k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) PQueueInsert(&parts[where[k]], k, ed[k]-id[k]); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum cut: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut == initcut) break; } PQueueFree(ctrl, &parts[0]); PQueueFree(ctrl, &parts[1]); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function balances two partitions by moving boundary nodes * from the domain that is overweight to the one that is underweight. **************************************************************************/ void Bnd2WayBalance(CtrlType *ctrl, GraphType *graph, int *tpwgts) { int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, tmp; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; idxtype *moved, *perm; PQueueType parts; int higain, oldgain, mincut, mindiff; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); /* Determine from which domain you will be moving data */ mindiff = abs(tpwgts[0]-pwgts[0]); from = (pwgts[0] < tpwgts[0] ? 1 : 0); to = (from+1)%2; IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d [B]\n", pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; PQueueInit(ctrl, &parts, nvtxs, tmp); idxset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert the boundary nodes of the proper partition whose size is OK in the priority queue */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = perm[ii]; ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); if (where[bndind[i]] == from && vwgt[bndind[i]] <= mindiff) PQueueInsert(&parts, bndind[i], ed[bndind[i]]-id[bndind[i]]); } mincut = graph->mincut; for (nswaps=0; nswaps<nvtxs; nswaps++) { if ((higain = PQueueGetMax(&parts)) == -1) break; ASSERT(bndptr[higain] != -1); if (pwgts[to]+vwgt[higain] > tpwgts[to]) break; mincut -= (ed[higain]-id[higain]); INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); where[higain] = to; moved[higain] = nswaps; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], mincut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) /* Remove it if in the queues */ PQueueDelete(&parts, k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) PQueueUpdate(&parts, k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1 && where[k] == from && vwgt[k] <= mindiff) PQueueInsert(&parts, k, ed[k]-id[k]); } } } } IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum cut: %6d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; PQueueFree(ctrl, &parts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function computes the assignment using the the objective the * minimization of the total volume of data that needs to move **************************************************************************/ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, WorkSpaceType *wspace, int npasses, int ncon) { int i, ii, j, k, nparts, mype; int pass, maxipwgt, nmapped, oldwgt, newwgt, done; idxtype *rowmap, *mylpwgts; KeyValueType *recv, send; int nsaved, gnsaved; mype = ctrl->mype; nparts = ctrl->nparts; recv = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv"); mylpwgts = idxmalloc(nparts, "mylpwgts"); done = nmapped = 0; idxset(nparts, -1, map); rowmap = idxset(nparts, -1, wspace->pv3); idxcopy(nparts, lpwgts, mylpwgts); for (pass=0; pass<npasses; pass++) { maxipwgt = idxamax(nparts, mylpwgts); if (mylpwgts[maxipwgt] > 0 && !done) { send.key = -mylpwgts[maxipwgt]; send.val = mype*nparts+maxipwgt; } else { send.key = 0; send.val = -1; } /* each processor sends its selection */ MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); ikeysort(nparts, recv); if (recv[0].key == 0) break; /* now make as many assignments as possible */ for (ii=0; ii<nparts; ii++) { i = recv[ii].val; if (i == -1) continue; j = i % nparts; k = i / nparts; if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; mylpwgts[j] = 0; if (mype == k) done = 1; } if (nmapped == nparts) break; } if (nmapped == nparts) break; } /* Map unmapped partitions */ if (nmapped < nparts) { for (i=j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (; i<nparts; i++) { if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) { for (i=0; i<nparts; i++) map[i] = i; IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); } else { /* check for a savings */ oldwgt = lpwgts[mype]; newwgt = lpwgts[rowmap[mype]]; nsaved = newwgt - oldwgt; gnsaved = GlobalSESum(ctrl, nsaved); /* undo everything if we don't see a savings */ if (gnsaved <= 0) { for (i=0; i<nparts; i++) map[i] = i; } IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); } GKfree((void **)&recv, (void **)&mylpwgts, LTERM); }