/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MlevelRecursiveBisection(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, floattype *tpwgts, floattype ubfactor, int fpart) { int i, j, nvtxs, cut, tvwgt, tpwgts2[2]; idxtype *label, *where; GraphType lgraph, rgraph; floattype wsum; nvtxs = graph->nvtxs; if (nvtxs == 0) { printf("\t***Cannot bisect a graph with 0 vertices!\n\t***You are trying to partition a graph into too many parts!\n"); return 0; } /* Determine the weights of the partitions */ tvwgt = idxsum(nvtxs, graph->vwgt); tpwgts2[0] = tvwgt*ssum(nparts/2, tpwgts); tpwgts2[1] = tvwgt-tpwgts2[0]; MlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); cut = graph->mincut; /* printf("%5d %5d %5d [%5d %f]\n", tpwgts2[0], tpwgts2[1], cut, tvwgt, ssum(nparts/2, tpwgts));*/ label = graph->label; where = graph->where; for (i=0; i<nvtxs; i++) part[label[i]] = where[i] + fpart; if (nparts > 2) { SplitGraphPart(ctrl, graph, &lgraph, &rgraph); /* printf("%d %d\n", lgraph.nvtxs, rgraph.nvtxs); */ } /* Free the memory of the top level graph */ GKfree(&graph->gdata, &graph->rdata, &graph->label, LTERM); /* Scale the fractions in the tpwgts according to the true weight */ wsum = ssum(nparts/2, tpwgts); sscale(nparts/2, 1.0/wsum, tpwgts); sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2); /* for (i=0; i<nparts; i++) printf("%5.3f ", tpwgts[i]); printf("[%5.3f]\n", wsum); */ /* Do the recursive call */ if (nparts > 3) { cut += MlevelRecursiveBisection(ctrl, &lgraph, nparts/2, part, tpwgts, ubfactor, fpart); cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2); } else if (nparts == 3) { cut += MlevelRecursiveBisection(ctrl, &rgraph, nparts-nparts/2, part, tpwgts+nparts/2, ubfactor, fpart+nparts/2); GKfree(&lgraph.gdata, &lgraph.label, LTERM); } return cut; }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ void MlevelNestedDissectionCC(CtrlType *ctrl, GraphType *graph, idxtype *order, float ubfactor, int lastvtx) { int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2], nsgraphs, ncmps, rnvtxs; idxtype *label, *bndind; idxtype *cptr, *cind; GraphType *sgraphs; nvtxs = graph->nvtxs; /* Determine the weights of the partitions */ tvwgt = idxsum(nvtxs, graph->vwgt); tpwgts2[0] = tvwgt/2; tpwgts2[1] = tvwgt-tpwgts2[0]; MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor); IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); /* Order the nodes in the separator */ nbnd = graph->nbnd; bndind = graph->bndind; label = graph->label; for (i=0; i<nbnd; i++) order[label[bndind[i]]] = --lastvtx; cptr = idxmalloc(nvtxs, "MlevelNestedDissectionCC: cptr"); cind = idxmalloc(nvtxs, "MlevelNestedDissectionCC: cind"); ncmps = FindComponents(ctrl, graph, cptr, cind); /* if (ncmps > 2) printf("[%5d] has %3d components\n", nvtxs, ncmps); */ sgraphs = (GraphType *)GKmalloc(ncmps*sizeof(GraphType), "MlevelNestedDissectionCC: sgraphs"); nsgraphs = SplitGraphOrderCC(ctrl, graph, sgraphs, ncmps, cptr, cind); GKfree(&cptr, &cind, LTERM); /* Free the memory of the top level graph */ GKfree(&graph->gdata, &graph->rdata, &graph->label, LTERM); /* Go and process the subgraphs */ for (rnvtxs=i=0; i<nsgraphs; i++) { if (sgraphs[i].adjwgt == NULL) { MMDOrder(ctrl, sgraphs+i, order, lastvtx-rnvtxs); GKfree(&sgraphs[i].gdata, &sgraphs[i].label, LTERM); } else { MlevelNestedDissectionCC(ctrl, sgraphs+i, order, ubfactor, lastvtx-rnvtxs); } rnvtxs += sgraphs[i].nvtxs; } free(sgraphs); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ void MlevelNestedDissection(CtrlType *ctrl, GraphType *graph, idxtype *order, float ubfactor, int lastvtx) { int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2]; idxtype *label, *bndind; GraphType lgraph, rgraph; nvtxs = graph->nvtxs; /* Determine the weights of the partitions */ tvwgt = idxsum(nvtxs, graph->vwgt); tpwgts2[0] = tvwgt/2; tpwgts2[1] = tvwgt-tpwgts2[0]; switch (ctrl->optype) { case OP_OEMETIS: MlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SepTmr)); ConstructMinCoverSeparator(ctrl, graph, ubfactor); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SepTmr)); break; case OP_ONMETIS: MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor); IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2])); break; } /* Order the nodes in the separator */ nbnd = graph->nbnd; bndind = graph->bndind; label = graph->label; for (i=0; i<nbnd; i++) order[label[bndind[i]]] = --lastvtx; SplitGraphOrder(ctrl, graph, &lgraph, &rgraph); /* Free the memory of the top level graph */ GKfree(&graph->gdata, &graph->rdata, &graph->label, LTERM); if (rgraph.nvtxs > MMDSWITCH) MlevelNestedDissection(ctrl, &rgraph, order, ubfactor, lastvtx); else { MMDOrder(ctrl, &rgraph, order, lastvtx); GKfree(&rgraph.gdata, &rgraph.rdata, &rgraph.label, LTERM); } if (lgraph.nvtxs > MMDSWITCH) MlevelNestedDissection(ctrl, &lgraph, order, ubfactor, lastvtx-rgraph.nvtxs); else { MMDOrder(ctrl, &lgraph, order, lastvtx-rgraph.nvtxs); GKfree(&lgraph.gdata, &lgraph.rdata, &lgraph.label, LTERM); } }
/************************************************************************* * This function deallocates any memory stored in a graph **************************************************************************/ void FreeInitialGraphAndRemap(GraphType *graph, int wgtflag, int freevsize) { int i, nedges; idxtype *adjncy, *imap; nedges = graph->nedges; adjncy = graph->adjncy; imap = graph->imap; if (imap != NULL) { for (i=0; i<nedges; i++) adjncy[i] = imap[adjncy[i]]; /* Apply local to global transformation */ } /* Free Metis's things */ GKfree((void **)&graph->match, (void **)&graph->cmap, (void **)&graph->lperm, (void **)&graph->where, (void **)&graph->label, (void **)&graph->rinfo, (void **)&graph->nrinfo, (void **)&graph->nvwgt, (void **)&graph->lpwgts, (void **)&graph->gpwgts, (void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->sepind, (void **)&graph->peind, (void **)&graph->sendptr, (void **)&graph->sendind, (void **)&graph->recvptr, (void **)&graph->recvind, (void **)&graph->imap, (void **)&graph->rlens, (void **)&graph->slens, (void **)&graph->rcand, (void **)&graph->pexadj, (void **)&graph->peadjncy, (void **)&graph->peadjloc, LTERM); if (freevsize) GKfree((void **)&graph->vsize, LTERM); if ((wgtflag&2) == 0) GKfree((void **)&graph->vwgt, LTERM); if ((wgtflag&1) == 0) GKfree((void **)&graph->adjwgt, LTERM); free(graph); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MCMlevelRecursiveBisection2(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts, idxtype *part, float ubfactor, int fpart) { int i, nvtxs, cut; float wsum, tpwgts2[2]; idxtype *label, *where; GraphType lgraph, rgraph; nvtxs = graph->nvtxs; if (nvtxs == 0) return 0; /* Determine the weights of the partitions */ tpwgts2[0] = ssum(nparts/2, tpwgts); tpwgts2[1] = 1.0-tpwgts2[0]; MCMlevelEdgeBisection(ctrl, graph, tpwgts2, ubfactor); cut = graph->mincut; label = graph->label; where = graph->where; for (i=0; i<nvtxs; i++) part[label[i]] = where[i] + fpart; if (nparts > 2) SplitGraphPart(ctrl, graph, &lgraph, &rgraph); /* Free the memory of the top level graph */ GKfree((void**)&graph->gdata, &graph->nvwgt, &graph->rdata, &graph->label, &graph->npwgts, LTERM); /* Scale the fractions in the tpwgts according to the true weight */ wsum = ssum(nparts/2, tpwgts); sscale(nparts/2, 1.0/wsum, tpwgts); sscale(nparts-nparts/2, 1.0/(1.0-wsum), tpwgts+nparts/2); /* Do the recursive call */ if (nparts > 3) { cut += MCMlevelRecursiveBisection2(ctrl, &lgraph, nparts/2, tpwgts, part, ubfactor, fpart); cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2); } else if (nparts == 3) { cut += MCMlevelRecursiveBisection2(ctrl, &rgraph, nparts-nparts/2, tpwgts+nparts/2, part, ubfactor, fpart+nparts/2); GKfree((void**)&lgraph.gdata, &lgraph.nvwgt, &lgraph.label, LTERM); } return cut; }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MlevelKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *tpwgts, float ubfactor) { int i, j, nvtxs, tvwgt, tpwgts2[2]; GraphType *cgraph; int wgtflag=3, numflag=0, options[10], edgecut; cgraph = Coarsen2Way(ctrl, graph); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); AllocateKWayPartitionMemory(ctrl, cgraph, nparts); options[0] = 1; options[OPTION_CTYPE] = MATCH_SHEMKWAY; options[OPTION_ITYPE] = IPART_GGPKL; options[OPTION_RTYPE] = RTYPE_FM; options[OPTION_DBGLVL] = 0; METIS_WPartGraphRecursive(&cgraph->nvtxs, cgraph->xadj, cgraph->adjncy, cgraph->vwgt, cgraph->adjwgt, &wgtflag, &numflag, &nparts, tpwgts, options, &edgecut, cgraph->where); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); RefineKWay(ctrl, graph, cgraph, nparts, tpwgts, ubfactor); idxcopy(graph->nvtxs, graph->where, part); GKfree(&graph->gdata, &graph->rdata, LTERM); return graph->mincut; }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm) { int i, npes, mype, nvtxs, firstvtx, dbglvl; idxtype *xadj, *adjncy; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int zeroflg = 0; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); return; } /* Setup a fake graph to allow the rest of the code to work unchanged */ dbglvl = 0; nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; /* Proceed with the rest of the code */ SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.seed = mype; ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial geometric partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); GKfree((void **)&xadj, (void **)&adjncy, LTERM); }
/************************************************************************* * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ void ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) { int i, j, nvtxs; idxtype *vwgt, *where; idxtype *lpvtxs, *gpvtxs; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; lpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: lpvtxs"); gpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs"); for (j=i=0; i<nvtxs; i++) { lpvtxs[where[i]]++; if (where[i] != ctrl->mype) j++; } /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */ MPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); *nmoved = GlobalSESum(ctrl, j); *maxout = GlobalSEMax(ctrl, j); *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j)); GKfree((void **)&lpvtxs, (void **)&gpvtxs, LTERM); }
void AllocateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int nparts, nvtxs; idxtype *vwgt; NRInfoType *rinfo, *myrinfo; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; graph->nrinfo = (NRInfoType *)GKmalloc(sizeof(NRInfoType)*nvtxs, "AllocateNodePartitionParams: rinfo"); graph->lpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: lpwgts"); graph->gpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: gpwgts"); graph->sepind = idxmalloc(nvtxs, "AllocateNodePartitionParams: sepind"); graph->hmarker = idxmalloc(nvtxs, "AllocateNodePartitionParams: hmarker"); /* Allocate additional memory for graph->vwgt in order to store the weights of the remote vertices */ vwgt = graph->vwgt; graph->vwgt = idxmalloc(nvtxs+graph->nrecv, "AllocateNodePartitionParams: graph->vwgt"); idxcopy(nvtxs, vwgt, graph->vwgt); GKfree((void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); }
/***************************************************************************** * This function computes a partitioning using coordinate data. *****************************************************************************/ void ParMETIS_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, int *nparts, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int i; int ncon = 1; float *tpwgts, ubvec[MAXNCON]; int myoptions[10]; tpwgts = fmalloc(*nparts*ncon, "tpwgts"); for (i=0; i<*nparts*ncon; i++) tpwgts[i] = 1.0/(float)(*nparts); for (i=0; i<ncon; i++) ubvec[i] = UNBALANCE_FRACTION; if (options[0] == 0) { myoptions[0] = 0; } else { myoptions[0] = 1; myoptions[PMV3_OPTION_DBGLVL] = options[OPTION_DBGLVL]; myoptions[PMV3_OPTION_SEED] = GLOBAL_SEED; } ParMETIS_V3_PartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, &ncon, nparts, tpwgts, ubvec, myoptions, edgecut, part, comm); GKfree((void **)&tpwgts, LTERM); return; }
/***************************************************************************** * This function computes a repartitioning by LMSR scratch-remap. *****************************************************************************/ void ParMETIS_RepartMLRemap(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int i; int nparts; int ncon = 1; float *tpwgts, ubvec[MAXNCON]; float ipc_factor = 1000.0; int myoptions[10]; MPI_Comm_size(*comm, &nparts); tpwgts = fmalloc(nparts*ncon, "tpwgts"); for (i=0; i<nparts*ncon; i++) tpwgts[i] = 1.0/(float)(nparts); for (i=0; i<ncon; i++) ubvec[i] = UNBALANCE_FRACTION; if (options[0] == 0) { myoptions[0] = 0; } else { myoptions[0] = 1; myoptions[PMV3_OPTION_DBGLVL] = options[OPTION_DBGLVL]; myoptions[PMV3_OPTION_SEED] = GLOBAL_SEED; myoptions[PMV3_OPTION_PSR] = PARMETIS_PSR_COUPLED; } ParMETIS_V3_AdaptiveRepart(vtxdist, xadj, adjncy, vwgt, NULL, adjwgt, wgtflag, numflag, &ncon, &nparts, tpwgts, ubvec, &ipc_factor, myoptions, edgecut, part, comm); GKfree((void **)&tpwgts, LTERM); }
/************************************************************************* * This function takes a bisection and constructs a minimum weight vertex * separator out of it. It uses the node-based separator refinement for it. **************************************************************************/ void ConstructSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) { int i, j, k, nvtxs, nbnd; idxtype *xadj, *where, *bndind; nvtxs = graph->nvtxs; xadj = graph->xadj; nbnd = graph->nbnd; bndind = graph->bndind; where = idxcopy(nvtxs, graph->where, idxwspacemalloc(ctrl, nvtxs)); /* Put the nodes in the boundary into the separator */ for (i=0; i<nbnd; i++) { j = bndind[i]; if (xadj[j+1]-xadj[j] > 0) /* Ignore islands */ where[j] = 2; } GKfree(&graph->rdata, LTERM); Allocate2WayNodePartitionMemory(ctrl, graph); idxcopy(nvtxs, where, graph->where); idxwspacefree(ctrl, nvtxs); ASSERT(IsSeparable(graph)); Compute2WayNodePartitionParams(ctrl, graph); ASSERT(CheckNodePartitionParams(graph)); FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); ASSERT(IsSeparable(graph)); }
/************************************************************************* * This function checks whether or not partition pid is contigous **************************************************************************/ int IsConnected2(GraphType *graph, int report) { int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; idxtype *xadj, *adjncy, *where, *touched, *queue; idxtype *cptr; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; where = graph->where; touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); queue = idxmalloc(nvtxs, "IsConnected: queue"); cptr = idxmalloc(nvtxs, "IsConnected: cptr"); nleft = nvtxs; touched[0] = 1; queue[0] = 0; first = 0; last = 1; cptr[0] = 0; /* This actually points to queue */ ncmps = 0; while (first != nleft) { if (first == last) { /* Find another starting vertex */ cptr[++ncmps] = first; for (i=0; i<nvtxs; i++) { if (!touched[i]) break; } queue[last++] = i; touched[i] = 1; } i = queue[first++]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (!touched[k]) { queue[last++] = k; touched[k] = 1; } } } cptr[++ncmps] = first; if (ncmps > 1 && report) { printf("%d connected components:\t", ncmps); for (i=0; i<ncmps; i++) { if (cptr[i+1]-cptr[i] > 200) printf("[%5d] ", cptr[i+1]-cptr[i]); } printf("\n"); } GKfree(&touched, &queue, &cptr, LTERM); return (ncmps == 1 ? 1 : 0); }
/****************************************************************************** * This function takes a partition vector that is distributed and reads in * the original graph and computes the edgecut *******************************************************************************/ int ComputeRealCut2(idxtype *vtxdist, idxtype *mvtxdist, idxtype *part, idxtype *mpart, char *filename, MPI_Comm comm) { int i, j, nvtxs, mype, npes, cut; idxtype *xadj, *adjncy, *gpart, *gmpart, *perm, *sizes; MPI_Status status; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); if (mype != 0) { MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm); MPI_Send((void *)mpart, mvtxdist[mype+1]-mvtxdist[mype], IDX_DATATYPE, 0, 1, comm); } else { /* Processor 0 does all the rest */ gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart"); idxcopy(vtxdist[1], part, gpart); gmpart = idxmalloc(mvtxdist[npes], "ComputeRealCut: gmpart"); idxcopy(mvtxdist[1], mpart, gmpart); for (i=1; i<npes; i++) { MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status); MPI_Recv((void *)(gmpart+mvtxdist[i]), mvtxdist[i+1]-mvtxdist[i], IDX_DATATYPE, i, 1, comm, &status); } /* OK, now go and reconstruct the permutation to go from the graph to mgraph */ perm = idxmalloc(vtxdist[npes], "ComputeRealCut: perm"); sizes = idxsmalloc(npes+1, 0, "ComputeRealCut: sizes"); for (i=0; i<vtxdist[npes]; i++) sizes[gpart[i]]++; MAKECSR(i, npes, sizes); for (i=0; i<vtxdist[npes]; i++) perm[i] = sizes[gpart[i]]++; /* Ok, now read the graph from the file */ ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy); /* OK, now compute the cut */ for (cut=0, i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (gmpart[perm[i]] != gmpart[perm[adjncy[j]]]) cut++; } } cut = cut/2; GKfree(&gpart, &gmpart, &perm, &sizes, &xadj, &adjncy, LTERM); return cut; } return 0; }
void FreeWSpace(WorkSpaceType *wspace) { GKfree((void **)&wspace->core, (void **)&wspace->pv1, (void **)&wspace->pv2, (void **)&wspace->pv3, (void **)&wspace->pv4, (void **)&wspace->pepairs1, (void **)&wspace->pepairs2, LTERM); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_EdgeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *options, int *sepsize, idxtype *part) { int i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = 0; ctrl.pfactor = 0; ctrl.nseps = 5; ctrl.optype = OP_OEMETIS; ctrl.CoarsenTo = amin(100, *nvtxs-1); ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelEdgeBisection(&ctrl, &graph, tpwgts, 1.05); ConstructMinCoverSeparator(&ctrl, &graph, 1.05); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); GKfree((void**)&graph.gdata, &graph.rdata, &graph.label, LTERM); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_NodeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, float *ubfactor, int *options, int *sepsize, idxtype *part) { int i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = OFLAG_COMPRESS; /* For by-passing the pre-coarsening for multiple runs */ ctrl.RType = 2; /* Standard 1-sided node refinement code */ ctrl.pfactor = 0; ctrl.nseps = 5; /* This should match NUM_INIT_MSECTIONS in ParMETISLib/defs.h */ ctrl.optype = OP_ONMETIS; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelNodeBisectionMultiple(&ctrl, &graph, tpwgts, *ubfactor*.95); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); GKfree((void **)&graph.gdata, &graph.rdata, &graph.label, LTERM); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function is the entry point for KMETIS with seed specification * in options[7] **************************************************************************/ void METIS_PartGraphKway2(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) { int i; float *tpwgts; tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); for (i=0; i<*nparts; i++) tpwgts[i] = 1.0/(1.0*(*nparts)); METIS_WPartGraphKway2(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); GKfree((void **)&tpwgts, LTERM); }
/************************************************************************* * Let the game begin **************************************************************************/ main(int argc, char *argv[]) { int i, j, ne, nn, etype, numflag=0; idxtype *elmnts, *xadj, *adjncy; timer IOTmr, DUALTmr; char fileout[256], etypestr[4][5] = {"TRI", "TET", "HEX", "QUAD"}; if (argc != 2) { printf("Usage: %s <meshfile>\n",argv[0]); exit(0); } cleartimer(IOTmr); cleartimer(DUALTmr); starttimer(IOTmr); elmnts = ReadMesh(argv[1], &ne, &nn, &etype); stoptimer(IOTmr); printf("**********************************************************************\n"); printf("%s", METISTITLE); printf("Mesh Information ----------------------------------------------------\n"); printf(" Name: %s, #Elements: %d, #Nodes: %d, Etype: %s\n\n", argv[1], ne, nn, etypestr[etype-1]); printf("Forming Dual Graph... -----------------------------------------------\n"); xadj = idxmalloc(ne+1, "main: xadj"); adjncy = idxmalloc(10*ne, "main: adjncy"); starttimer(DUALTmr); METIS_MeshToDual(&ne, &nn, elmnts, &etype, &numflag, xadj, adjncy); stoptimer(DUALTmr); printf(" Dual Information: #Vertices: %d, #Edges: %d\n", ne, xadj[ne]/2); sprintf(fileout, "%s.dgraph", argv[1]); starttimer(IOTmr); WriteGraph(fileout, ne, xadj, adjncy); stoptimer(IOTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t\t %7.3f\n", gettimer(IOTmr)); printf(" Dual Creation:\t\t %7.3f\n", gettimer(DUALTmr)); printf("**********************************************************************\n"); GKfree(&elmnts, &xadj, &adjncy, LTERM); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MCMlevelKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *rubvec) { int i, j, nvtxs; GraphType *cgraph; int options[10], edgecut; cgraph = MCCoarsen2Way(ctrl, graph); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); MocAllocateKWayPartitionMemory(ctrl, cgraph, nparts); options[0] = 1; options[OPTION_CTYPE] = MATCH_SBHEM_INFNORM; options[OPTION_ITYPE] = IPART_RANDOM; options[OPTION_RTYPE] = RTYPE_FM; options[OPTION_DBGLVL] = 0; /* Determine what you will use as the initial partitioner, based on tolerances */ for (i=0; i<graph->ncon; i++) { if (rubvec[i] > 1.2) break; } if (i == graph->ncon) METIS_mCPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, options, &edgecut, cgraph->where); else METIS_mCHPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, rubvec, options, &edgecut, cgraph->where); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); MocRefineKWayHorizontal(ctrl, graph, cgraph, nparts, rubvec); idxcopy(graph->nvtxs, graph->where, part); GKfree(&graph->nvwgt, &graph->npwgts, &graph->gdata, &graph->rdata, LTERM); return graph->mincut; }
/************************************************************************* * This function computes the size of the coarse graph **************************************************************************/ int ComputeCoarseGraphSize(int nvtxs, idxtype *xadj, idxtype *adjncy, int cnvtxs, idxtype *cmap, idxtype *match, idxtype *perm) { int i, j, k, istart, iend, nedges, cnedges, v, u; idxtype *htable; htable = idxsmalloc(cnvtxs, -1, "htable"); cnvtxs = cnedges = 0; for (i=0; i<nvtxs; i++) { v = perm[i]; if (cmap[v] != cnvtxs) continue; htable[cnvtxs] = cnvtxs; u = match[v]; istart = xadj[v]; iend = xadj[v+1]; for (j=istart; j<iend; j++) { k = cmap[adjncy[j]]; if (htable[k] != cnvtxs) { htable[k] = cnvtxs; cnedges++; } } if (v != u) { istart = xadj[u]; iend = xadj[u+1]; for (j=istart; j<iend; j++) { k = cmap[adjncy[j]]; if (htable[k] != cnvtxs) { htable[k] = cnvtxs; cnedges++; } } } cnvtxs++; } GKfree(&htable, LTERM); return cnedges; }
// this actually undoes the permutation Matrix* permuteRowsAndColumns(const Matrix *M, const idxtype* rowPerm, const idxtype* colPerm) { Matrix *ret = allocMatrix(M->nvtxs, M->nnz, 0, 0, 0); int i; idxtype* revRowPerm = idxmalloc(M->nvtxs, "permuteRowsAndColumns:revRowPerm"); // idxtype* revColPerm = idxmalloc(M->nvtxs, // "permuteRowsAndColumns:revColPerm"); for ( i=0; i<M->nvtxs; i++ ) { revRowPerm[rowPerm[i]] = i; // revColPerm[colPerm[i]] = i; } ret->xadj[0]=0; for ( i=0; i<M->nvtxs; i++ ) { int orgI = revRowPerm[i]; int j; ret->xadj[i+1] = ret->xadj[i] + ( M->xadj[orgI+1] - M->xadj[orgI] ); for ( j=ret->xadj[i]; j<ret->xadj[i+1]; j++ ) { int orgJ = M->xadj[orgI] + j - ret->xadj[i]; ret->adjncy[j] = colPerm[M->adjncy[orgJ]]; ret->adjwgt[j] = M->adjwgt[orgJ]; } ParallelQSort( ret->adjncy, ret->adjwgt, ret->xadj[i], ret->xadj[i+1]-1 ); } ret->nnz = M->nnz; // GKfree ( (void**)&revRowPerm, (void**)&revColPerm, LTERM ); GKfree ( (void**)&revRowPerm, LTERM ); return ret; }
/************************************************************************* * This function frees the buckets **************************************************************************/ void PQueueFree(CtrlType *ctrl, PQueueType *queue) { if (queue->type == 1) { if (queue->mustfree) { queue->buckets -= queue->ngainspan; GKfree(&queue->nodes, &queue->buckets, LTERM); } else { idxwspacefree(ctrl, sizeof(ListNodeType *)*(queue->ngainspan+queue->pgainspan+1)/sizeof(idxtype)); idxwspacefree(ctrl, sizeof(ListNodeType)*queue->maxnodes/sizeof(idxtype)); } } else { idxwspacefree(ctrl, sizeof(KeyValueType)*queue->maxnodes/sizeof(idxtype)); idxwspacefree(ctrl, queue->maxnodes); } queue->maxnodes = 0; }
/************************************************************************* * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) { int i, nvtxs, nparts, myhome; idxtype *vwgt, *where; idxtype *lend, *gend, *lleft, *gleft, *lstart, *gstart; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; nparts = ctrl->nparts; lstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lstart"); gstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gstart"); lleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lleft"); gleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gleft"); lend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lend"); gend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gend"); for (i=0; i<nvtxs; i++) { myhome = (ctrl->ps_relation == COUPLED) ? ctrl->mype : graph->home[i]; lstart[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; lend[where[i]] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; if (where[i] != myhome) lleft[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; } /* PrintVector(ctrl, ctrl->npes, 0, lend, "Lend: "); */ MPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); *nmoved = idxsum(nparts, gleft); *maxout = gleft[idxamax(nparts, gleft)]; for (i=0; i<nparts; i++) lstart[i] = gend[i]+gleft[i]-gstart[i]; *maxin = lstart[idxamax(nparts, lstart)]; GKfree((void **)&lstart, (void **)&gstart, (void **)&lleft, (void **)&gleft, (void **)&lend, (void **)&gend, LTERM); }
/************************************************************************* * This function deallocates any memory stored in a graph **************************************************************************/ void FreeGraph(GraphType *graph) { GKfree((void **)&graph->xadj, (void **)&graph->vwgt, (void **)&graph->nvwgt, (void **)&graph->vsize, (void **)&graph->adjncy, (void **)&graph->adjwgt, (void **)&graph->vtxdist, (void **)&graph->match, (void **)&graph->cmap, (void **)&graph->lperm, (void **)&graph->label, (void **)&graph->where, (void **)&graph->home, (void **)&graph->rinfo, (void **)&graph->nrinfo, (void **)&graph->sepind, (void **)&graph->hmarker, (void **)&graph->lpwgts, (void **)&graph->gpwgts, (void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->peind, (void **)&graph->sendptr, (void **)&graph->sendind, (void **)&graph->recvptr, (void **)&graph->recvind, (void **)&graph->imap, (void **)&graph->rlens, (void **)&graph->slens, (void **)&graph->rcand, (void **)&graph->pexadj, (void **)&graph->peadjncy, (void **)&graph->peadjloc, LTERM); free(graph); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void MocGrowBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) { int i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs; idxtype *bestwhere, *where; nvtxs = graph->nvtxs; MocAllocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(graph->nedges, graph->adjwgt); for (; nbfs>0; nbfs--) { idxset(nvtxs, 1, where); where[RandomInRange(nvtxs)] = 0; MocCompute2WayPartitionParams(ctrl, graph); MocInit2WayBalance(ctrl, graph, tpwgts); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); MocBalance2Way(ctrl, graph, tpwgts, 1.02); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); GKfree((void**)&bestwhere, LTERM); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void EstimateCFraction(int nvtxs, idxtype *xadj, idxtype *adjncy, floattype *vfraction, floattype *efraction) { int i, ii, j, cnvtxs, cnedges, maxidx; idxtype *match, *cmap, *perm; cmap = idxmalloc(nvtxs, "cmap"); match = idxsmalloc(nvtxs, UNMATCHED, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { /* Unmatched */ maxidx = i; /* Find a random matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { if (match[adjncy[j]] == UNMATCHED) { maxidx = adjncy[j]; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } cnedges = ComputeCoarseGraphSize(nvtxs, xadj, adjncy, cnvtxs, cmap, match, perm); *vfraction = (1.0*cnvtxs)/(1.0*nvtxs); *efraction = (1.0*cnedges)/(1.0*xadj[nvtxs]); GKfree(&cmap, &match, &perm, LTERM); }
/****************************************************************************** * This function takes a partition vector that is distributed and reads in * the original graph and computes the edgecut *******************************************************************************/ int ComputeRealCut(idxtype *vtxdist, idxtype *part, char *filename, MPI_Comm comm) { int i, j, nvtxs, mype, npes, cut; idxtype *xadj, *adjncy, *gpart; MPI_Status status; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); if (mype != 0) { MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm); } else { /* Processor 0 does all the rest */ gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart"); idxcopy(vtxdist[1], part, gpart); for (i=1; i<npes; i++) MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status); ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy); /* OK, now compute the cut */ for (cut=0, i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (gpart[i] != gpart[adjncy[j]]) cut++; } } cut = cut/2; GKfree(&gpart, &xadj, &adjncy, LTERM); return cut; } return 0; }
/************************************************************************* * This function computes the subdomain graph **************************************************************************/ void EliminateSubDomainEdges(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts) { int i, ii, j, k, me, other, nvtxs, total, max, avg, totalout, nind, ncand, ncand2, target, target2, nadd; int min, move, cpwgt, tvwgt; idxtype *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, *pmat, *ndoms, *mypmat, *otherpmat, *ind; KeyValueType *cand, *cand2; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; adjwgt = graph->adjwgt; where = graph->where; pwgts = graph->pwgts; /* We assume that this is properly initialized */ maxpwgt = idxwspacemalloc(ctrl, nparts); ndoms = idxwspacemalloc(ctrl, nparts); otherpmat = idxwspacemalloc(ctrl, nparts); ind = idxwspacemalloc(ctrl, nvtxs); pmat = ctrl->wspace.pmat; cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); cand2 = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand"); /* Compute the pmat matrix and ndoms */ ComputeSubDomainGraph(graph, nparts, pmat, ndoms); /* Compute the maximum allowed weight for each domain */ tvwgt = idxsum(nparts, pwgts); for (i=0; i<nparts; i++) maxpwgt[i] = 1.25*tpwgts[i]*tvwgt; /* Get into the loop eliminating subdomain connections */ for (;;) { total = idxsum(nparts, ndoms); avg = total/nparts; max = ndoms[idxamax(nparts, ndoms)]; /* printf("Adjacent Subdomain Stats: Total: %3d, Max: %3d, Avg: %3d [%5d]\n", total, max, avg, idxsum(nparts*nparts, pmat)); */ if (max < 1.4*avg) break; me = idxamax(nparts, ndoms); mypmat = pmat + me*nparts; totalout = idxsum(nparts, mypmat); /*printf("Me: %d, TotalOut: %d,\n", me, totalout);*/ /* Sort the connections according to their cut */ for (ncand2=0, i=0; i<nparts; i++) { if (mypmat[i] > 0) { cand2[ncand2].key = mypmat[i]; cand2[ncand2++].val = i; } } ikeysort(ncand2, cand2); move = 0; for (min=0; min<ncand2; min++) { if (cand2[min].key > totalout/(2*ndoms[me])) break; other = cand2[min].val; /*printf("\tMinOut: %d to %d\n", mypmat[other], other);*/ idxset(nparts, 0, otherpmat); /* Go and find the vertices in 'other' that are connected in 'me' */ for (nind=0, i=0; i<nvtxs; i++) { if (where[i] == other) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (where[adjncy[j]] == me) { ind[nind++] = i; break; } } } } /* Go and construct the otherpmat to see where these nind vertices are connected to */ for (cpwgt=0, ii=0; ii<nind; ii++) { i = ind[ii]; cpwgt += vwgt[i]; for (j=xadj[i]; j<xadj[i+1]; j++) otherpmat[where[adjncy[j]]] += adjwgt[j]; } otherpmat[other] = 0; for (ncand=0, i=0; i<nparts; i++) { if (otherpmat[i] > 0) { cand[ncand].key = -otherpmat[i]; cand[ncand++].val = i; } } ikeysort(ncand, cand); /* * Go through and the select the first domain that is common with 'me', and * does not increase the ndoms[target] higher than my ndoms, subject to the * maxpwgt constraint. Traversal is done from the mostly connected to the least. */ target = target2 = -1; for (i=0; i<ncand; i++) { k = cand[i].val; if (mypmat[k] > 0) { if (pwgts[k] + cpwgt > maxpwgt[k]) /* Check if balance will go off */ continue; for (j=0; j<nparts; j++) { if (otherpmat[j] > 0 && ndoms[j] >= ndoms[me]-1 && pmat[nparts*j+k] == 0) break; } if (j == nparts) { /* No bad second level effects */ for (nadd=0, j=0; j<nparts; j++) { if (otherpmat[j] > 0 && pmat[nparts*k+j] == 0) nadd++; } /*printf("\t\tto=%d, nadd=%d, %d\n", k, nadd, ndoms[k]);*/ if (target2 == -1 && ndoms[k]+nadd < ndoms[me]) { target2 = k; } if (nadd == 0) { target = k; break; } } } } if (target == -1 && target2 != -1) target = target2; if (target == -1) { /* printf("\t\tCould not make the move\n");*/ continue; } /*printf("\t\tMoving to %d\n", target);*/ /* Update the partition weights */ INC_DEC(pwgts[target], pwgts[other], cpwgt); MoveGroupMConn(ctrl, graph, ndoms, pmat, nparts, target, nind, ind); move = 1; break; } if (move == 0) break; } idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nparts); idxwspacefree(ctrl, nvtxs); GKfree(&cand, &cand2, LTERM); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) { int h, i, ii, iii, j, k, c; int pass, nvtxs, nedges, ncon; int nmoves, nmoved, nswaps, nzgswaps; /* int gnswaps, gnzgswaps; */ int me, firstvtx, lastvtx, yourlastvtx; int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; int nlupd, nsupd, nnbrs, nchanged; idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; idxtype *where, *tmp_where, *moved; floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; idxtype *update, *supdate, *rupdate, *pe_updates; idxtype *changed, *perm, *pperm, *htable; idxtype *peind, *recvptr, *sendptr; KeyValueType *swchanges, *rwchanges; RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; int *nupds_pe; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); /*************************/ /* set up common aliases */ /*************************/ nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; ladjncy = graph->adjncy; adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->rinfo; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; ubvec = ctrl->ubvec; tpwgts = ctrl->tpwgts; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWR: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; /************************************/ /* set up important data structures */ /************************************/ perm = idxmalloc(nvtxs, "KWR: perm"); pperm = idxmalloc(nparts, "KWR: pperm"); update = idxmalloc(nvtxs, "KWR: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWR: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); for (i=0; i<nparts; i++) { for (h=0; h<ncon; h++) { badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h]; } } movewgts = fmalloc(nparts*ncon, "KWR: movewgts"); ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts"); pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts"); overfill = fmalloc(nparts*ncon, "KWR: overfill"); moved = idxmalloc(nvtxs, "KWR: moved"); tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where"); tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); idxcopy(nvtxs+graph->nrecv, where, tmp_where); for (i=0; i<nvtxs; i++) { tmp_rinfo[i].id = rinfo[i].id; tmp_rinfo[i].ed = rinfo[i].ed; tmp_rinfo[i].ndegrees = rinfo[i].ndegrees; tmp_rinfo[i].degrees = tmp_edegrees+xadj[i]; for (j=0; j<rinfo[i].ndegrees; j++) { tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge; tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt; } } nswaps = nzgswaps = 0; /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; pass<npasses; pass++) { if (mype == 0) RandomPermute(nparts, pperm, 1); MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); FastRandomPermute(nvtxs, perm, 1); oldcut = graph->mincut; /* check to see if the partitioning is imbalanced */ Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); ubavg = savg(ncon, ubvec); lbavg = savg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { scopy(ncon*nparts, gnpwgts, ognpwgts); sset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ /* PASS ONE -- record stats for desired moves */ /**********************************************/ for (iii=0; iii<nvtxs; iii++) { i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; for (h=0; h<ncon; h++) if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT) break; if (h < ncon) { continue; } /* check for a potential improvement */ if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) { my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) { to = my_edegrees[k].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) break; } } oldto = to; /* check if a subdomain was found that fits */ if (k < tmp_rinfo[i].ndegrees) { for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) { to = my_edegrees[j].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) { if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || (my_edegrees[j].ewgt == my_edegrees[k].ewgt && IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ k = j; oldto = my_edegrees[k].edge; } } } } to = oldto; if (my_edegrees[k].ewgt > tmp_rinfo[i].id || (my_edegrees[k].ewgt == tmp_rinfo[i].id && (imbalanced || graph->level > 3 || iii % 8 == 0) && IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ /****************************************/ /* Update tmp arrays of the moved vertex */ /****************************************/ tmp_where[i] = to; moved[nmoved++] = i; for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; gnpwgts[to*ncon+h] += nvwgt[h]; gnpwgts[from*ncon+h] -= nvwgt[h]; movewgts[to*ncon+h] += nvwgt[h]; movewgts[from*ncon+h] -= nvwgt[h]; } tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } } /******************************************/ /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ /**************************/ overweight = 0; for (j=0; j<nparts; j++) { for (h=0; h<ncon; h++) { if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) { overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); } else { overfill[j*ncon+h] = 0.0; } overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } /****************************************************/ /* select moves to undo according to overfill array */ /****************************************************/ if (overweight == 1) { for (iii=0; iii<nmoved; iii++) { i = moved[iii]; oldto = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) if (my_edegrees[k].edge == where[i]) break; for (h=0; h<ncon; h++) if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; /**********************************/ /* nullify this move if necessary */ /**********************************/ if (k != tmp_rinfo[i].ndegrees && h != ncon) { moved[iii] = -1; from = oldto; to = where[i]; for (h=0; h<ncon; h++) { overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0); } tmp_where[i] = to; tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } /*************************************************/ /* PASS TWO -- commit the remainder of the moves */ /*************************************************/ nlupd = nsupd = nmoves = nchanged = 0; for (iii=0; iii<nmoved; iii++) { i = moved[iii]; if (i == -1) continue; where[i] = tmp_where[i]; /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ htable[i] = 1; update[nlupd++] = i; } /* Put the vertices adjacent to i into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = ladjncy[j]; if (htable[k] == 0) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; nswaps++; /* check number of zero-gain moves */ for (k=0; k<rinfo[i].ndegrees; k++) if (rinfo[i].degrees[k].edge == to) break; if (rinfo[i].id == rinfo[i].degrees[k].ewgt) nzgswaps++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } /* Tell interested pe's the new where[] info for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices / whose degrees need to be update. /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { yourlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the recieved to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; mydomain = where[i]; myrinfo = rinfo+i; tmp_myrinfo = tmp_rinfo+i; my_edegrees = myrinfo->degrees; your_edegrees = tmp_myrinfo->degrees; graph->lmincut -= myrinfo->ed; myrinfo->ndegrees = 0; myrinfo->id = 0; myrinfo->ed = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { yourdomain = where[ladjncy[j]]; if (mydomain != yourdomain) { myrinfo->ed += adjwgt[j]; for (k=0; k<myrinfo->ndegrees; k++) { if (my_edegrees[k].edge == yourdomain) { my_edegrees[k].ewgt += adjwgt[j]; your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { my_edegrees[k].edge = yourdomain; my_edegrees[k].ewgt = adjwgt[j]; your_edegrees[k].edge = yourdomain; your_edegrees[k].ewgt = adjwgt[j]; myrinfo->ndegrees++; } ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; tmp_myrinfo->id = myrinfo->id; tmp_myrinfo->ed = myrinfo->ed; tmp_myrinfo->ndegrees = myrinfo->ndegrees; } /* finally, sum-up the partition weights */ MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; if (graph->mincut == oldcut) break; } /* gnswaps = GlobalSESum(ctrl, nswaps); gnzgswaps = GlobalSESum(ctrl, nzgswaps); if (mype == 0) printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps); */ GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }