/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_HEM(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, nvtxs, cnvtxs, maxidx, dim; idxtype *xadj, *vwgt, *adjncy; idxtype *match, *cmap, *perm, *tperm; realtype curwgt, maxwgt; realtype *vvol, *vsurf, *adjwgt, *adjwgtsum; dim = ctrl->dim; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; vvol = graph->vvol; vsurf = graph->vsurf; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); tperm = idxmalloc(nvtxs, "tperm"); RandomPermute(nvtxs, tperm, 1); BucketSortKeysInc(nvtxs, vwgt[iamax(nvtxs, vwgt)], vwgt, tperm, perm); /* RandomPermute(nvtxs, perm, 1); */ cnvtxs = 0; /* Compute a heavy-edge style matching giving preferance to small vertices */ for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { maxidx = i; maxwgt = 0.0; /* Find a heavy-edge matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; curwgt = 1.0/ARATIO2(dim, vsurf[i]+vsurf[k]+adjwgtsum[i]+adjwgtsum[k]- 2.0*adjwgt[j], vvol[i]+vvol[k]); if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize && curwgt > maxwgt) { maxwgt = curwgt; maxidx = k; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&tperm, &perm, &match, LTERM); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { /* Take care the npes = 1 case */ idxset(vtxdist[1], 0, part); *edgecut = 0; return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); SetUpCtrl(&ctrl, npes, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize"); PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype)); AdaptiveUndirected_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype)); ReMapGraph(&ctrl, graph, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; IMfree((void**)&graph->vsize, LTERM); FreeInitialGraphAndRemap(graph, *wgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_RM(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, nvtxs, cnvtxs, maxidx; idxtype *xadj, *vwgt, *adjncy; idxtype *match, *cmap, *perm; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "graph->cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { maxidx = i; /* Find a random matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize) { maxidx = k; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&match, &perm, LTERM); }
/*********************************************************************************** * This function is the testing routine for the adaptive multilevel partitioning code. * It computes a partition from scratch, it then moves the graph and changes some * of the vertex weights and then call the adaptive code. ************************************************************************************/ void TestParMGridGen(char *filename, int *options, int minsize, int maxsize, MPI_Comm comm) { int i, nparts, npes, mype; MGridGraphType graph; idxtype *part; double tmr; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); MGridReadTestGraph(&graph, filename, comm); part = idxmalloc(graph.nvtxs, "TestParMGridGen: part"); /*====================================================================== / ParMETIS_AspectRatio /=======================================================================*/ if (mype==0) printf("------------------------ PARAMETERS --------------------------------------\n"); for (i=0; i<npes; i++) if (mype == i) printf("%s, Dim=%d [%2d %2d] CType=%d RType=%d Nvtxs=%d Nedges=%d\n", filename, options[OPTION_DIM], minsize, maxsize, options[OPTION_CTYPE], options[OPTION_RTYPE], graph.nvtxs, graph.nedges); cleartimer(tmr); MPI_Barrier(comm); starttimer(tmr); ParMGridGen(graph.vtxdist, graph.xadj, graph.vvol, graph.vsurf, graph.adjncy, graph.adjwgt, &nparts, minsize, maxsize, options, part, &comm); MPI_Barrier(comm); stoptimer(tmr); printf("Total Time = %lf\n", gettimer(tmr)); WriteParallelPartition(filename, part, graph.vtxdist, nparts, mype, npes); IMfree(&graph.vtxdist, &graph.xadj, &graph.vvol, &graph.vsurf, &graph.vwgt, &graph.adjncy, &graph.adjwgt, &part, LTERM); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_HEM_True(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, dim, nvtxs, cnvtxs, ncand; idxtype *xadj, *vwgt, *adjncy; idxtype *match, *cmap, *perm; realtype *vvol, *vsurf, *adjwgt, *adjwgtsum; FKeyValueType *cand; dim = ctrl->dim; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; vvol = graph->vvol; vsurf = graph->vsurf; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cand = (FKeyValueType *)IMmalloc((xadj[nvtxs]/2)*sizeof(FKeyValueType), "cand"); /* insert the vertices according to their aspect ratio */ for (ncand=0, ii=0; ii<nvtxs; ii++) { i = perm[ii]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (k > i || vwgt[i] + vwgt[k] > ctrl->maxsize) continue; cand[ncand].val1 = i; cand[ncand].val2 = k; cand[ncand].key = ARATIO2(dim, vsurf[i]+vsurf[k]+adjwgtsum[i]+adjwgtsum[k] -2.0*adjwgt[j], vvol[i]+vvol[k]); ncand++; } } ifkeysort(ncand, cand); /* Compute heaviest style matching */ idxset(nvtxs, -1, perm); for (cnvtxs=0, ii=0; ii<ncand; ii++) { if (cnvtxs > .25*nvtxs) break; i = cand[ii].val1; k = cand[ii].val2; if (match[i] == UNMATCHED && match[k] == UNMATCHED) { perm[cnvtxs] = i; perm[nvtxs-cnvtxs-1] = k; cmap[i] = cmap[k] = cnvtxs++; match[i] = k; match[k] = i; } } /* take care of the unmatched vertices */ for (i=0; i<nvtxs; i++) { if (match[i] == UNMATCHED) { perm[cnvtxs] = i; cmap[i] = cnvtxs++; match[i] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&cand, &perm, &match, LTERM); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_HEM_Slow_Restricted(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, dim, nvtxs, cnvtxs, maxidx, nmatched; idxtype *xadj, *vwgt, *adjncy, *where; idxtype *match, *cmap, *perm; realtype curwgt, maxwgt; realtype *vvol, *vsurf, *adjwgt, *adjwgtsum; dim = ctrl->dim; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; vvol = graph->vvol; vsurf = graph->vsurf; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; where = graph->where; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; /* Compute a heavy-edge style matching giving preferance to small vertices */ for (nmatched=0, ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { maxidx = i; maxwgt = 0.0; /* Find a heavy-edge matching, subject to maxvwgt constraints */ if (nmatched < .3*nvtxs) { for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (where[i] != where[k]) continue; /* perform a restricted matching */ curwgt = 1.0/ARATIO2(dim, vsurf[i]+vsurf[k]+adjwgtsum[i]+adjwgtsum[k] -2.0*adjwgt[j], vvol[i]+vvol[k]); if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize && curwgt > maxwgt) { maxwgt = curwgt; maxidx = k; } } } if (maxidx != i) nmatched++; cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&perm, &match, LTERM); }
void CreateFusedElementGraph(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int *numflag) { int i, j, k, l; int *nparts, ipart, mypart, newpart; int wgtflag, edgecut, npes, mype, nvtxs, nedges, counter; int gnfvtxs, nfvtxs, firstfvtx; int foptions[10]; idxtype *fptr, *find; idxtype *part, *fpart, *spart, *rpart; idxtype *vtxdist, *xadj, *adjncy; idxtype *fvtxdist, *fxadj, *fadjncy; idxtype *map; realtype *fadjwgt; MPI_Comm *comm; npes = ctrl->npes; mype = ctrl->mype; nparts = &(ctrl->nparts); comm = &(ctrl->comm); vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; nvtxs = vtxdist[mype+1]-vtxdist[mype]; nedges = xadj[nvtxs]; SetUp(ctrl, graph, wspace); /* Communicate number of parts found */ fvtxdist = idxmalloc(npes+1, "FusedElementGraph: fvtxdist"); MPI_Allgather((void *)nparts, 1, MPI_INT, (void *)fvtxdist, 1, MPI_INT, *comm); MAKECSR(i, npes, fvtxdist); firstfvtx = fvtxdist[mype]; nfvtxs = fvtxdist[mype+1]-fvtxdist[mype]; gnfvtxs = fvtxdist[npes]; ASSERT(ctrl, nfvtxs == *nparts); part = idxmalloc(nvtxs+graph->nrecv, "FusedElementGraph: part"); idxcopy(nvtxs, graph->where, part); spart = wspace->indices; rpart = part + nvtxs; CommInterfaceData(ctrl, graph, part, spart, rpart); /* Create a part-to-vertex mapping */ /* map = idxsmalloc(gnfvtxs, -1, "FusedElementGraph: map"); */ /* TOO GENEROUS !! */ /* fptr = idxsmalloc(*nparts+1, 0, "FusedElementGraph: fptr"); */ /* find = idxmalloc(nvtxs, "FusedElementGraph: find"); */ if (gnfvtxs + nvtxs + (*nparts+1) <= wspace->maxcore) { map = wspace->core; idxset(gnfvtxs, -1, map); fptr = map + gnfvtxs; } else { map = idxsmalloc(gnfvtxs, -1, "FusedElementGraph: map"); fptr = wspace->core; } idxset((*nparts+1), 0, fptr); find = fptr + (*nparts+1); for (i=0; i<nvtxs; i++) fptr[part[i]-firstfvtx]++; MAKECSR(i, *nparts, fptr); for (i=0; i<nvtxs; i++) { ipart = part[i] - firstfvtx; find[fptr[ipart]] = i; fptr[ipart]++; } for (ipart=*nparts; ipart>0; ipart--) fptr[ipart] = fptr[ipart-1]; fptr[0] = 0; /* Create the fused graph for the local edges */ fxadj = idxsmalloc(nfvtxs+1, 0, "FusedElementGraph: fxadj"); fadjncy = idxmalloc(nedges, "FusedElementGraph: fadjncy"); fadjwgt = realsmalloc(nedges, 0, "FusedElementGraph: fadjwgt"); fxadj[0] = 0; for (ipart=0; ipart<*nparts; ipart++) { counter = 0; mypart = ipart + firstfvtx; for (l=fptr[ipart]; l<fptr[ipart+1]; l++) { i = find[l]; for (j=xadj[i]; j<xadj[i+1]; j++) { k=adjncy[j]; newpart=part[k]; if (newpart != mypart && map[newpart] == -1) { /* edge is not created yet */ map[newpart] = fxadj[ipart]+counter; fadjncy[fxadj[ipart]+counter] = newpart; fadjwgt[map[newpart]] = 1; /* alternatively = adjwgt[k] */ counter++; } else if (newpart != mypart && map[newpart] != -1) /* edge is already there */ fadjwgt[map[newpart]]++; } } fxadj[ipart+1] = fxadj[ipart] + counter; for (i=fxadj[ipart]; i<fxadj[ipart+1]; i++) map[fadjncy[i]] = -1; } /* Now change the weights of the interface edges */ ChangeWeights(nfvtxs, fvtxdist, fxadj, fadjncy, fadjwgt, *comm); /* Repartition the graph using fused elements */ foptions[0] = 1; foptions[3] = 0; /* fpart = idxmalloc(nfvtxs, "TestParMetis: fpart"); */ fpart = map; /* it is OK since nfvtxs < gnfvtxs */ wgtflag = 1; ParMETIS_RepartLDiffusion(fvtxdist, fxadj, fadjncy, NULL, fadjwgt, &wgtflag, numflag, foptions, &edgecut, fpart, comm); /* Project the partitioning back to the original graph */ for (ipart=0; ipart<nfvtxs; ipart++) { ASSERTP(ctrl, fpart[ipart] >= 0 && fpart[ipart] < npes, (ctrl, "%d %d %d\n", ipart , fpart[ipart], npes) ); for (i=fptr[ipart]; i<fptr[ipart+1]; i++) graph->where[find[i]]=fpart[ipart]; } if (gnfvtxs + nvtxs + (*nparts+1) > wspace->maxcore) IMfree((void**)&map, LTERM); IMfree((void**)&fvtxdist, &fxadj, &fadjncy, &fadjwgt, &part, LTERM); }