void METIS_NodeRefine(int nvtxs, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, idxtype *where, idxtype *hmarker, float ubfactor) { GraphType *graph; CtrlType ctrl; ctrl.dbglvl = ONMETIS_DBGLVL; ctrl.optype = OP_ONMETIS; graph = CreateGraph(); SetUpGraph(graph, OP_ONMETIS, nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); AllocateWorkSpace(&ctrl, graph, 2); Allocate2WayNodePartitionMemory(&ctrl, graph); idxcopy(nvtxs, where, graph->where); Compute2WayNodePartitionParams(&ctrl, graph); FM_2WayNodeRefine_OneSidedP(&ctrl, graph, hmarker, ubfactor, 10); /* FM_2WayNodeRefine_TwoSidedP(&ctrl, graph, hmarker, ubfactor, 10); */ FreeWorkSpace(&ctrl, graph); idxcopy(nvtxs, graph->where, where); FreeGraph(graph); }
/************************************************************************* * This function takes a bisection and constructs a minimum weight vertex * separator out of it. It uses the node-based separator refinement for it. **************************************************************************/ void ConstructSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) { int i, j, k, nvtxs, nbnd; idxtype *xadj, *where, *bndind; nvtxs = graph->nvtxs; xadj = graph->xadj; nbnd = graph->nbnd; bndind = graph->bndind; where = idxcopy(nvtxs, graph->where, idxwspacemalloc(ctrl, nvtxs)); /* Put the nodes in the boundary into the separator */ for (i=0; i<nbnd; i++) { j = bndind[i]; if (xadj[j+1]-xadj[j] > 0) /* Ignore islands */ where[j] = 2; } GKfree(&graph->rdata, LTERM); Allocate2WayNodePartitionMemory(ctrl, graph); idxcopy(nvtxs, where, graph->where); idxwspacefree(ctrl, nvtxs); ASSERT(IsSeparable(graph)); Compute2WayNodePartitionParams(ctrl, graph); ASSERT(CheckNodePartitionParams(graph)); FM_2WayNodeRefine(ctrl, graph, ubfactor, 8); ASSERT(IsSeparable(graph)); }
/****************************************************************************** * This function takes a partition vector that is distributed and reads in * the original graph and computes the edgecut *******************************************************************************/ int ComputeRealCut2(idxtype *vtxdist, idxtype *mvtxdist, idxtype *part, idxtype *mpart, char *filename, MPI_Comm comm) { int i, j, nvtxs, mype, npes, cut; idxtype *xadj, *adjncy, *gpart, *gmpart, *perm, *sizes; MPI_Status status; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); if (mype != 0) { MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm); MPI_Send((void *)mpart, mvtxdist[mype+1]-mvtxdist[mype], IDX_DATATYPE, 0, 1, comm); } else { /* Processor 0 does all the rest */ gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart"); idxcopy(vtxdist[1], part, gpart); gmpart = idxmalloc(mvtxdist[npes], "ComputeRealCut: gmpart"); idxcopy(mvtxdist[1], mpart, gmpart); for (i=1; i<npes; i++) { MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status); MPI_Recv((void *)(gmpart+mvtxdist[i]), mvtxdist[i+1]-mvtxdist[i], IDX_DATATYPE, i, 1, comm, &status); } /* OK, now go and reconstruct the permutation to go from the graph to mgraph */ perm = idxmalloc(vtxdist[npes], "ComputeRealCut: perm"); sizes = idxsmalloc(npes+1, 0, "ComputeRealCut: sizes"); for (i=0; i<vtxdist[npes]; i++) sizes[gpart[i]]++; MAKECSR(i, npes, sizes); for (i=0; i<vtxdist[npes]; i++) perm[i] = sizes[gpart[i]]++; /* Ok, now read the graph from the file */ ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy); /* OK, now compute the cut */ for (cut=0, i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (gmpart[perm[i]] != gmpart[perm[adjncy[j]]]) cut++; } } cut = cut/2; GKfree(&gpart, &gmpart, &perm, &sizes, &xadj, &adjncy, LTERM); return cut; } return 0; }
Matrix* setupCanonicalMatrix(int nvtxs, int nedges, idxtype* xadj, idxtype* adjncy, idxtype* adjwgt, int ncutify) { int i,j; Matrix* ret; if ( ncutify ) ret=allocMatrix(nvtxs,nedges,1,0,0); else ret=allocMatrix(nvtxs,nedges,0,0,0); idxcopy(nvtxs+1, xadj, ret->xadj); idxcopy(nedges, adjncy, ret->adjncy); if ( adjwgt != NULL ) { if ( ncutify ) { for(i=0;i<ret->nvtxs;i++) { ret->adjwgtsum[i]=0; for(j=ret->xadj[i];j<ret->xadj[i+1];j++) { ret->adjwgt[j]=(wgttype)adjwgt[j]; ret->adjwgtsum[i]+=ret->adjwgt[j]; } } //ncutifyWeights(ret,1,ncutify); //YK removed } else { for(i=0;i<nedges;i++) ret->adjwgt[i]=(wgttype)adjwgt[i]; } normalizeColumns(ret,1,0); } else { if ( ncutify ) ncutifyWeights(ret,0,ncutify); normalizeColumns(ret,0,0); } // sort each column in ascending order. This is necessary for // getDprAdjMatrix. for(i=0;i<nvtxs;i++) { ParallelQSort(ret->adjncy,ret->adjwgt,ret->xadj[i],ret->xadj[i+1]-1); } return ret; }
void AllocateNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int nparts, nvtxs; idxtype *vwgt; NRInfoType *rinfo, *myrinfo; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; graph->nrinfo = (NRInfoType *)GKmalloc(sizeof(NRInfoType)*nvtxs, "AllocateNodePartitionParams: rinfo"); graph->lpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: lpwgts"); graph->gpwgts = idxmalloc(2*nparts, "AllocateNodePartitionParams: gpwgts"); graph->sepind = idxmalloc(nvtxs, "AllocateNodePartitionParams: sepind"); graph->hmarker = idxmalloc(nvtxs, "AllocateNodePartitionParams: hmarker"); /* Allocate additional memory for graph->vwgt in order to store the weights of the remote vertices */ vwgt = graph->vwgt; graph->vwgt = idxmalloc(nvtxs+graph->nrecv, "AllocateNodePartitionParams: graph->vwgt"); idxcopy(nvtxs, vwgt, graph->vwgt); GKfree((void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MlevelKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *tpwgts, float ubfactor) { int i, j, nvtxs, tvwgt, tpwgts2[2]; GraphType *cgraph; int wgtflag=3, numflag=0, options[10], edgecut; cgraph = Coarsen2Way(ctrl, graph); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); AllocateKWayPartitionMemory(ctrl, cgraph, nparts); options[0] = 1; options[OPTION_CTYPE] = MATCH_SHEMKWAY; options[OPTION_ITYPE] = IPART_GGPKL; options[OPTION_RTYPE] = RTYPE_FM; options[OPTION_DBGLVL] = 0; METIS_WPartGraphRecursive(&cgraph->nvtxs, cgraph->xadj, cgraph->adjncy, cgraph->vwgt, cgraph->adjwgt, &wgtflag, &numflag, &nparts, tpwgts, options, &edgecut, cgraph->where); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); RefineKWay(ctrl, graph, cgraph, nparts, tpwgts, ubfactor); idxcopy(graph->nvtxs, graph->where, part); GKfree(&graph->gdata, &graph->rdata, LTERM); return graph->mincut; }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm) { int i, npes, mype, nvtxs, firstvtx, dbglvl; idxtype *xadj, *adjncy; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int zeroflg = 0; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); return; } /* Setup a fake graph to allow the rest of the code to work unchanged */ dbglvl = 0; nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; /* Proceed with the rest of the code */ SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.seed = mype; ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial geometric partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); GKfree((void **)&xadj, (void **)&adjncy, LTERM); }
/************************************************************************* * This function re-adjusts the amount of memory that was allocated if * it will lead to significant savings **************************************************************************/ void ReAdjustMemory(GraphType *graph, GraphType *cgraph, int dovsize) { if (cgraph->nedges > 100000 && graph->nedges < 0.7*graph->nedges) { idxcopy(cgraph->nedges, cgraph->adjwgt, cgraph->adjncy+cgraph->nedges); if (graph->ncon == 1) { if (dovsize) { cgraph->gdata = realloc(cgraph->gdata, (5*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); /* Do this, in case everything was copied into new space */ cgraph->xadj = cgraph->gdata; cgraph->vwgt = cgraph->gdata + cgraph->nvtxs+1; cgraph->vsize = cgraph->gdata + 2*cgraph->nvtxs+1; cgraph->adjwgtsum = cgraph->gdata + 3*cgraph->nvtxs+1; cgraph->cmap = cgraph->gdata + 4*cgraph->nvtxs+1; cgraph->adjncy = cgraph->gdata + 5*cgraph->nvtxs+1; cgraph->adjwgt = cgraph->gdata + 5*cgraph->nvtxs+1 + cgraph->nedges; } else { cgraph->gdata = realloc(cgraph->gdata, (4*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); /* Do this, in case everything was copied into new space */ cgraph->xadj = cgraph->gdata; cgraph->vwgt = cgraph->gdata + cgraph->nvtxs+1; cgraph->adjwgtsum = cgraph->gdata + 2*cgraph->nvtxs+1; cgraph->cmap = cgraph->gdata + 3*cgraph->nvtxs+1; cgraph->adjncy = cgraph->gdata + 4*cgraph->nvtxs+1; cgraph->adjwgt = cgraph->gdata + 4*cgraph->nvtxs+1 + cgraph->nedges; } } else { if (dovsize) { cgraph->gdata = realloc(cgraph->gdata, (4*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); /* Do this, in case everything was copied into new space */ cgraph->xadj = cgraph->gdata; cgraph->vsize = cgraph->gdata + cgraph->nvtxs+1; cgraph->adjwgtsum = cgraph->gdata + 2*cgraph->nvtxs+1; cgraph->cmap = cgraph->gdata + 3*cgraph->nvtxs+1; cgraph->adjncy = cgraph->gdata + 4*cgraph->nvtxs+1; cgraph->adjwgt = cgraph->gdata + 4*cgraph->nvtxs+1 + cgraph->nedges; } else { cgraph->gdata = realloc(cgraph->gdata, (3*cgraph->nvtxs+1 + 2*cgraph->nedges)*sizeof(idxtype)); /* Do this, in case everything was copied into new space */ cgraph->xadj = cgraph->gdata; cgraph->adjwgtsum = cgraph->gdata + cgraph->nvtxs+1; cgraph->cmap = cgraph->gdata + 2*cgraph->nvtxs+1; cgraph->adjncy = cgraph->gdata + 3*cgraph->nvtxs+1; cgraph->adjwgt = cgraph->gdata + 3*cgraph->nvtxs+1 + cgraph->nedges; } } } }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void MocGrowBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) { int i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs; idxtype *bestwhere, *where; nvtxs = graph->nvtxs; MocAllocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(graph->nedges, graph->adjwgt); for (; nbfs>0; nbfs--) { idxset(nvtxs, 1, where); where[RandomInRange(nvtxs)] = 0; MocCompute2WayPartitionParams(ctrl, graph); MocInit2WayBalance(ctrl, graph, tpwgts); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); MocBalance2Way(ctrl, graph, tpwgts, 1.02); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); if (bestcut >= graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); /*GKfree(&bestwhere, LTERM);*/ GKfree1((void**)&bestwhere); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void MocGrowBisection2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) { int /*i, j, k,*/ nvtxs, /*ncon, from,*/ bestcut, /*mincut,*/ nbfs; idxtype *bestwhere, *where; nvtxs = graph->nvtxs; MocAllocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(graph->nedges, graph->adjwgt); for (; nbfs>0; nbfs--) { idxset(nvtxs, 1, where); where[RandomInRange(nvtxs)] = 0; MocCompute2WayPartitionParams(ctrl, graph); MocBalance2Way2(ctrl, graph, tpwgts, ubvec); MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); MocBalance2Way2(ctrl, graph, tpwgts, ubvec); MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); GKfree((void**)&bestwhere, LTERM); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_EdgeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *options, int *sepsize, idxtype *part) { int i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = 0; ctrl.pfactor = 0; ctrl.nseps = 5; ctrl.optype = OP_OEMETIS; ctrl.CoarsenTo = amin(100, *nvtxs-1); ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelEdgeBisection(&ctrl, &graph, tpwgts, 1.05); ConstructMinCoverSeparator(&ctrl, &graph, 1.05); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); GKfree((void**)&graph.gdata, &graph.rdata, &graph.label, LTERM); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_NodeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, float *ubfactor, int *options, int *sepsize, idxtype *part) { int i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = OFLAG_COMPRESS; /* For by-passing the pre-coarsening for multiple runs */ ctrl.RType = 2; /* Standard 1-sided node refinement code */ ctrl.pfactor = 0; ctrl.nseps = 5; /* This should match NUM_INIT_MSECTIONS in ParMETISLib/defs.h */ ctrl.optype = OP_ONMETIS; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelNodeBisectionMultiple(&ctrl, &graph, tpwgts, *ubfactor*.95); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); GKfree((void **)&graph.gdata, &graph.rdata, &graph.label, LTERM); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_NodeComputeSeparator(idxtype *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, idxtype *options, idxtype *sepsize, idxtype *part) { idxtype i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt, 1); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = 0; ctrl.pfactor = 0; ctrl.nseps = 3; ctrl.optype = OP_ONMETIS; ctrl.CoarsenTo = amin(100, *nvtxs-1); ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelNodeBisectionMultiple(&ctrl, &graph, tpwgts, 1.02); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); FreeGraph(&graph, 0); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void MocGrowBisectionNew2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) { idxtype i, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs, inbfs; idxtype *bestwhere, *where; nvtxs = graph->nvtxs; MocAllocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); for (inbfs=0; inbfs<nbfs; inbfs++) { idxset(nvtxs, 1, where); where[RandomInRange(nvtxs)] = 0; MocCompute2WayPartitionParams(ctrl, graph); MocInit2WayBalance2(ctrl, graph, tpwgts, ubvec); MocFM_2WayEdgeRefine2(ctrl, graph, tpwgts, ubvec, 4); if (inbfs == 0 || bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); gk_free((void **)&bestwhere, LTERM); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MCMlevelKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *rubvec) { int i, j, nvtxs; GraphType *cgraph; int options[10], edgecut; cgraph = MCCoarsen2Way(ctrl, graph); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); MocAllocateKWayPartitionMemory(ctrl, cgraph, nparts); options[0] = 1; options[OPTION_CTYPE] = MATCH_SBHEM_INFNORM; options[OPTION_ITYPE] = IPART_RANDOM; options[OPTION_RTYPE] = RTYPE_FM; options[OPTION_DBGLVL] = 0; /* Determine what you will use as the initial partitioner, based on tolerances */ for (i=0; i<graph->ncon; i++) { if (rubvec[i] > 1.2) break; } if (i == graph->ncon) METIS_mCPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, options, &edgecut, cgraph->where); else METIS_mCHPartGraphRecursiveInternal(&cgraph->nvtxs, &cgraph->ncon, cgraph->xadj, cgraph->adjncy, cgraph->nvwgt, cgraph->adjwgt, &nparts, rubvec, options, &edgecut, cgraph->where); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); MocRefineKWayHorizontal(ctrl, graph, cgraph, nparts, rubvec); idxcopy(graph->nvtxs, graph->where, part); GKfree(&graph->nvwgt, &graph->npwgts, &graph->gdata, &graph->rdata, LTERM); return graph->mincut; }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { /* Take care the npes = 1 case */ idxset(vtxdist[1], 0, part); *edgecut = 0; return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); SetUpCtrl(&ctrl, npes, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize"); PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype)); AdaptiveUndirected_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype)); ReMapGraph(&ctrl, graph, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; IMfree((void**)&graph->vsize, LTERM); FreeInitialGraphAndRemap(graph, *wgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
/************************************************************************* * This function is the entry point for detecting contacts between * bounding boxes and surface nodes **************************************************************************/ void METIS_FindContacts(void *raw_cinfo, idxtype *nboxes, double *boxcoords, idxtype *nparts, idxtype **r_cntptr, idxtype **r_cntind) { idxtype i, ncnts, tncnts, maxtncnts; idxtype *cntptr, *cntind, *auxcntind, *stack, *marker; ContactInfoType *cinfo; cinfo = (ContactInfoType *)raw_cinfo; maxtncnts = 6*(*nboxes); cntptr = idxsmalloc(*nboxes+1, 0, "METIS_FindContacts: cntptr"); cntind = idxmalloc(maxtncnts, "METIS_FindContacts: cntind"); auxcntind = idxmalloc(*nparts, "METIS_FindContacts: auxcntind"); stack = idxmalloc(cinfo->nnodes, "METIS_FindContacts: stack"); marker = idxsmalloc(*nparts, 0, "METIS_FindContacts: marker"); /* Go through each box and determine its contacting partitions */ for (tncnts=0, i=0; i<*nboxes; i++) { ncnts = FindBoxContacts(cinfo, boxcoords+i*6, stack, auxcntind, marker); if (ncnts == 0) mprintf("CSearchError: Box has no contacts!\n"); if (ncnts + tncnts >= maxtncnts) { maxtncnts += (tncnts+ncnts)*(*nboxes-i)/i; if ((cntind = (idxtype *)realloc(cntind, maxtncnts*sizeof(idxtype))) == NULL) errexit("Realloc failed! of %d words!\n", maxtncnts); } cntptr[i] = ncnts; idxcopy(ncnts, auxcntind, cntind+tncnts); tncnts += ncnts; } MAKECSR(i, *nboxes, cntptr); *r_cntptr = cntptr; *r_cntind = cntind; gk_free((void **)&auxcntind, &stack, &marker, LTERM); }
/****************************************************************************** * This function takes a partition vector that is distributed and reads in * the original graph and computes the edgecut *******************************************************************************/ int ComputeRealCut(idxtype *vtxdist, idxtype *part, char *filename, MPI_Comm comm) { int i, j, nvtxs, mype, npes, cut; idxtype *xadj, *adjncy, *gpart; MPI_Status status; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); if (mype != 0) { MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm); } else { /* Processor 0 does all the rest */ gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart"); idxcopy(vtxdist[1], part, gpart); for (i=1; i<npes; i++) MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status); ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy); /* OK, now compute the cut */ for (cut=0, i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (gpart[i] != gpart[adjncy[j]]) cut++; } } cut = cut/2; GKfree(&gpart, &xadj, &adjncy, LTERM); return cut; } return 0; }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) { int h, i, ii, iii, j, k, c; int pass, nvtxs, nedges, ncon; int nmoves, nmoved, nswaps, nzgswaps; /* int gnswaps, gnzgswaps; */ int me, firstvtx, lastvtx, yourlastvtx; int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; int nlupd, nsupd, nnbrs, nchanged; idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; idxtype *where, *tmp_where, *moved; floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; idxtype *update, *supdate, *rupdate, *pe_updates; idxtype *changed, *perm, *pperm, *htable; idxtype *peind, *recvptr, *sendptr; KeyValueType *swchanges, *rwchanges; RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; int *nupds_pe; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); /*************************/ /* set up common aliases */ /*************************/ nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; ladjncy = graph->adjncy; adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->rinfo; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; ubvec = ctrl->ubvec; tpwgts = ctrl->tpwgts; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWR: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; /************************************/ /* set up important data structures */ /************************************/ perm = idxmalloc(nvtxs, "KWR: perm"); pperm = idxmalloc(nparts, "KWR: pperm"); update = idxmalloc(nvtxs, "KWR: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWR: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); for (i=0; i<nparts; i++) { for (h=0; h<ncon; h++) { badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h]; } } movewgts = fmalloc(nparts*ncon, "KWR: movewgts"); ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts"); pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts"); overfill = fmalloc(nparts*ncon, "KWR: overfill"); moved = idxmalloc(nvtxs, "KWR: moved"); tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where"); tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); idxcopy(nvtxs+graph->nrecv, where, tmp_where); for (i=0; i<nvtxs; i++) { tmp_rinfo[i].id = rinfo[i].id; tmp_rinfo[i].ed = rinfo[i].ed; tmp_rinfo[i].ndegrees = rinfo[i].ndegrees; tmp_rinfo[i].degrees = tmp_edegrees+xadj[i]; for (j=0; j<rinfo[i].ndegrees; j++) { tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge; tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt; } } nswaps = nzgswaps = 0; /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; pass<npasses; pass++) { if (mype == 0) RandomPermute(nparts, pperm, 1); MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); FastRandomPermute(nvtxs, perm, 1); oldcut = graph->mincut; /* check to see if the partitioning is imbalanced */ Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); ubavg = savg(ncon, ubvec); lbavg = savg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { scopy(ncon*nparts, gnpwgts, ognpwgts); sset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ /* PASS ONE -- record stats for desired moves */ /**********************************************/ for (iii=0; iii<nvtxs; iii++) { i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; for (h=0; h<ncon; h++) if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT) break; if (h < ncon) { continue; } /* check for a potential improvement */ if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) { my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) { to = my_edegrees[k].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) break; } } oldto = to; /* check if a subdomain was found that fits */ if (k < tmp_rinfo[i].ndegrees) { for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) { to = my_edegrees[j].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) { if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || (my_edegrees[j].ewgt == my_edegrees[k].ewgt && IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ k = j; oldto = my_edegrees[k].edge; } } } } to = oldto; if (my_edegrees[k].ewgt > tmp_rinfo[i].id || (my_edegrees[k].ewgt == tmp_rinfo[i].id && (imbalanced || graph->level > 3 || iii % 8 == 0) && IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ /****************************************/ /* Update tmp arrays of the moved vertex */ /****************************************/ tmp_where[i] = to; moved[nmoved++] = i; for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; gnpwgts[to*ncon+h] += nvwgt[h]; gnpwgts[from*ncon+h] -= nvwgt[h]; movewgts[to*ncon+h] += nvwgt[h]; movewgts[from*ncon+h] -= nvwgt[h]; } tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } } /******************************************/ /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ /**************************/ overweight = 0; for (j=0; j<nparts; j++) { for (h=0; h<ncon; h++) { if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) { overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); } else { overfill[j*ncon+h] = 0.0; } overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } /****************************************************/ /* select moves to undo according to overfill array */ /****************************************************/ if (overweight == 1) { for (iii=0; iii<nmoved; iii++) { i = moved[iii]; oldto = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) if (my_edegrees[k].edge == where[i]) break; for (h=0; h<ncon; h++) if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; /**********************************/ /* nullify this move if necessary */ /**********************************/ if (k != tmp_rinfo[i].ndegrees && h != ncon) { moved[iii] = -1; from = oldto; to = where[i]; for (h=0; h<ncon; h++) { overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0); } tmp_where[i] = to; tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } /*************************************************/ /* PASS TWO -- commit the remainder of the moves */ /*************************************************/ nlupd = nsupd = nmoves = nchanged = 0; for (iii=0; iii<nmoved; iii++) { i = moved[iii]; if (i == -1) continue; where[i] = tmp_where[i]; /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ htable[i] = 1; update[nlupd++] = i; } /* Put the vertices adjacent to i into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = ladjncy[j]; if (htable[k] == 0) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; nswaps++; /* check number of zero-gain moves */ for (k=0; k<rinfo[i].ndegrees; k++) if (rinfo[i].degrees[k].edge == to) break; if (rinfo[i].id == rinfo[i].degrees[k].ewgt) nzgswaps++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } /* Tell interested pe's the new where[] info for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices / whose degrees need to be update. /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { yourlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the recieved to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; mydomain = where[i]; myrinfo = rinfo+i; tmp_myrinfo = tmp_rinfo+i; my_edegrees = myrinfo->degrees; your_edegrees = tmp_myrinfo->degrees; graph->lmincut -= myrinfo->ed; myrinfo->ndegrees = 0; myrinfo->id = 0; myrinfo->ed = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { yourdomain = where[ladjncy[j]]; if (mydomain != yourdomain) { myrinfo->ed += adjwgt[j]; for (k=0; k<myrinfo->ndegrees; k++) { if (my_edegrees[k].edge == yourdomain) { my_edegrees[k].ewgt += adjwgt[j]; your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { my_edegrees[k].edge = yourdomain; my_edegrees[k].ewgt = adjwgt[j]; your_edegrees[k].edge = yourdomain; your_edegrees[k].ewgt = adjwgt[j]; myrinfo->ndegrees++; } ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; tmp_myrinfo->id = myrinfo->id; tmp_myrinfo->ed = myrinfo->ed; tmp_myrinfo->ndegrees = myrinfo->ndegrees; } /* finally, sum-up the partition weights */ MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; if (graph->mincut == oldcut) break; } /* gnswaps = GlobalSESum(ctrl, nswaps); gnzgswaps = GlobalSESum(ctrl, nzgswaps); if (mype == 0) printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps); */ GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }
/************************************************************************* * This function projects a partition, and at the same time computes the * parameters for refinement. **************************************************************************/ void Project2WayPartition(CtrlType *ctrl, GraphType *graph) { int i, j, k, nvtxs, nbnd, me; idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; idxtype *cmap, *where, *id, *ed, *bndptr, *bndind; idxtype *cwhere, *cid, *ced, *cbndptr; GraphType *cgraph; cgraph = graph->coarser; cwhere = cgraph->where; cid = cgraph->id; ced = cgraph->ed; cbndptr = cgraph->bndptr; nvtxs = graph->nvtxs; cmap = graph->cmap; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; Allocate2WayPartitionMemory(ctrl, graph); where = graph->where; id = idxset(nvtxs, 0, graph->id); ed = idxset(nvtxs, 0, graph->ed); bndptr = idxset(nvtxs, -1, graph->bndptr); bndind = graph->bndind; /* Go through and project partition and compute id/ed for the nodes */ for (i=0; i<nvtxs; i++) { k = cmap[i]; where[i] = cwhere[k]; cmap[i] = cbndptr[k]; } for (nbnd=0, i=0; i<nvtxs; i++) { me = where[i]; id[i] = adjwgtsum[i]; if (xadj[i] == xadj[i+1]) { bndptr[i] = nbnd; bndind[nbnd++] = i; } else { if (cmap[i] != -1) { /* If it is an interface node. Note that cmap[i] = cbndptr[cmap[i]] */ for (j=xadj[i]; j<xadj[i+1]; j++) { if (me != where[adjncy[j]]) ed[i] += adjwgt[j]; } id[i] -= ed[i]; if (ed[i] > 0 || xadj[i] == xadj[i+1]) { bndptr[i] = nbnd; bndind[nbnd++] = i; } } } } graph->mincut = cgraph->mincut; graph->nbnd = nbnd; idxcopy(2, cgraph->pwgts, graph->pwgts); FreeGraph(graph->coarser); graph->coarser = NULL; }
/************************************************************************* * This function performs the gather/scatter for the boundary vertices **************************************************************************/ void CommChangedInterfaceData(CtrlType *ctrl, GraphType *graph, int nchanged, idxtype *changed, idxtype *data, KeyValueType *sendpairs, KeyValueType *recvpairs, idxtype *psendptr) { int i, j, k, n, penum, nnbrs, firstvtx, nrecv; idxtype *peind, *sendptr, *recvptr, *recvind, *pexadj, *peadjncy, *peadjloc; KeyValueType *pairs; firstvtx = graph->vtxdist[ctrl->mype]; nnbrs = graph->nnbrs; nrecv = graph->nrecv; peind = graph->peind; sendptr = graph->sendptr; recvptr = graph->recvptr; recvind = graph->recvind; pexadj = graph->pexadj; peadjncy = graph->peadjncy; peadjloc = graph->peadjloc; /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(recvpairs+recvptr[i]), 2*(recvptr[i+1]-recvptr[i]), IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } if (nchanged != 0) { idxcopy(ctrl->npes, sendptr, psendptr); /* Copy the changed values into the sendvector */ for (i=0; i<nchanged; i++) { j = changed[i]; for (k=pexadj[j]; k<pexadj[j+1]; k++) { penum = peadjncy[k]; sendpairs[psendptr[penum]].key = peadjloc[k]; sendpairs[psendptr[penum]].val = data[j]; psendptr[penum]++; } } for (i=0; i<nnbrs; i++) { MPI_Isend((void *)(sendpairs+sendptr[i]), 2*(psendptr[i]-sendptr[i]), IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); } } else { for (i=0; i<nnbrs; i++) MPI_Isend((void *)(sendpairs), 0, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); } /* OK, now get into the loop waiting for the operations to finish */ for (i=0; i<nnbrs; i++) { MPI_Wait(ctrl->rreq+i, &(ctrl->status)); MPI_Get_count(&ctrl->status, IDX_DATATYPE, &n); if (n != 0) { n = n/2; pairs = recvpairs+graph->recvptr[i]; for (k=0; k<n; k++) data[pairs[k].key] = pairs[k].val; } } MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void MocRandomBisection(CtrlType *ctrl, GraphType *graph, float *tpwgts, float ubfactor) { int i, ii, j, k, nvtxs, ncon, from, bestcut, mincut, nbfs, qnum; idxtype *bestwhere, *where, *perm; int counts[MAXNCON]; float *nvwgt; nvtxs = graph->nvtxs; ncon = graph->ncon; nvwgt = graph->nvwgt; MocAllocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); nbfs = 2*(nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(graph->nedges, graph->adjwgt); perm = idxmalloc(nvtxs, "BisectGraph: perm"); for (; nbfs>0; nbfs--) { for (i=0; i<ncon; i++) counts[i] = 0; RandomPermute(nvtxs, perm, 1); /* Partition by spliting the queues randomly */ for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; qnum = samax(ncon, nvwgt+i*ncon); where[i] = counts[qnum]; counts[qnum] = (counts[qnum]+1)%2; } MocCompute2WayPartitionParams(ctrl, graph); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 6); MocBalance2Way(ctrl, graph, tpwgts, 1.02); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 6); MocBalance2Way(ctrl, graph, tpwgts, 1.02); MocFM_2WayEdgeRefine(ctrl, graph, tpwgts, 6); /* printf("Edgecut: %6d, NPwgts: [", graph->mincut); for (i=0; i<graph->ncon; i++) printf("(%.3f %.3f) ", graph->npwgts[i], graph->npwgts[graph->ncon+i]); printf("]\n"); */ if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); GKfree((void**)&bestwhere, &perm, LTERM); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void RandomBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) { int i, ii, j, k, nvtxs, pwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where; idxtype *perm, *bestwhere; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; Allocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); perm = idxmalloc(nvtxs, "BisectGraph: queue"); ASSERTP(tpwgts[0]+tpwgts[1] == idxsum(nvtxs, vwgt), ("%d %d\n", tpwgts[0]+tpwgts[1], idxsum(nvtxs, vwgt))); maxpwgt[0] = ubfactor*tpwgts[0]; maxpwgt[1] = ubfactor*tpwgts[1]; minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; nbfs = (nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(nvtxs, graph->adjwgtsum)+1; /* The +1 is for the 0 edges case */ for (; nbfs>0; nbfs--) { RandomPermute(nvtxs, perm, 1); idxset(nvtxs, 1, where); pwgts[1] = tpwgts[0]+tpwgts[1]; pwgts[0] = 0; if (nbfs != 1) { for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (pwgts[0]+vwgt[i] < maxpwgt[0]) { where[i] = 0; pwgts[0] += vwgt[i]; pwgts[1] -= vwgt[i]; if (pwgts[0] > minpwgt[0]) break; } } } /************************************************************* * Do some partition refinement **************************************************************/ Compute2WayPartitionParams(ctrl, graph); /* printf("IPART: %3d [%5d %5d] [%5d %5d] %5d\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ Balance2Way(ctrl, graph, tpwgts, ubfactor); /* printf("BPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ FM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); /* printf("RPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut); */ if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); GKfree(&bestwhere, &perm, LTERM); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void GrowBisection(CtrlType *ctrl, GraphType *graph, int *tpwgts, float ubfactor) { int i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where; idxtype *queue, *touched, *gain, *bestwhere; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; Allocate2WayPartitionMemory(ctrl, graph); where = graph->where; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); queue = idxmalloc(nvtxs, "BisectGraph: queue"); touched = idxmalloc(nvtxs, "BisectGraph: touched"); ASSERTP(tpwgts[0]+tpwgts[1] == idxsum(nvtxs, vwgt), ("%d %d\n", tpwgts[0]+tpwgts[1], idxsum(nvtxs, vwgt))); maxpwgt[0] = ubfactor*tpwgts[0]; maxpwgt[1] = ubfactor*tpwgts[1]; minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; nbfs = (nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = idxsum(nvtxs, graph->adjwgtsum)+1; /* The +1 is for the 0 edges case */ for (; nbfs>0; nbfs--) { idxset(nvtxs, 0, touched); pwgts[1] = tpwgts[0]+tpwgts[1]; pwgts[0] = 0; idxset(nvtxs, 1, where); queue[0] = RandomInRange(nvtxs); touched[queue[0]] = 1; first = 0; last = 1; nleft = nvtxs-1; drain = 0; /* Start the BFS from queue to get a partition */ for (;;) { if (first == last) { /* Empty. Disconnected graph! */ if (nleft == 0 || drain) break; k = RandomInRange(nleft); for (i=0; i<nvtxs; i++) { if (touched[i] == 0) { if (k == 0) break; else k--; } } queue[0] = i; touched[i] = 1; first = 0; last = 1;; nleft--; } i = queue[first++]; if (pwgts[0] > 0 && pwgts[1]-vwgt[i] < minpwgt[1]) { drain = 1; continue; } where[i] = 0; INC_DEC(pwgts[0], pwgts[1], vwgt[i]); if (pwgts[1] <= maxpwgt[1]) break; drain = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (touched[k] == 0) { queue[last++] = k; touched[k] = 1; nleft--; } } } /* Check to see if we hit any bad limiting cases */ if (pwgts[1] == 0) { i = RandomInRange(nvtxs); where[i] = 1; INC_DEC(pwgts[1], pwgts[0], vwgt[i]); } /************************************************************* * Do some partition refinement **************************************************************/ Compute2WayPartitionParams(ctrl, graph); /*printf("IPART: %3d [%5d %5d] [%5d %5d] %5d\n", graph->nvtxs, pwgts[0], pwgts[1], graph->pwgts[0], graph->pwgts[1], graph->mincut); */ Balance2Way(ctrl, graph, tpwgts, ubfactor); /*printf("BPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut);*/ FM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); /*printf("RPART: [%5d %5d] %5d\n", graph->pwgts[0], graph->pwgts[1], graph->mincut);*/ if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); if (bestcut == 0) break; } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); GKfree(&bestwhere, &queue, &touched, LTERM); }
/************************************************************************* * This function takes a graph and produces a bisection by using a region * growing algorithm. The resulting partition is returned in * graph->where **************************************************************************/ void GrowBisectionNode(CtrlType *ctrl, GraphType *graph, float ubfactor) { int i, j, k, nvtxs, drain, nleft, first, last, pwgts[2], tpwgts[2], minpwgt[2], maxpwgt[2], from, bestcut, icut, mincut, me, pass, nbfs; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *bndind; idxtype *queue, *touched, *gain, *bestwhere; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bestwhere = idxmalloc(nvtxs, "BisectGraph: bestwhere"); queue = idxmalloc(nvtxs, "BisectGraph: queue"); touched = idxmalloc(nvtxs, "BisectGraph: touched"); tpwgts[0] = idxsum(nvtxs, vwgt); tpwgts[1] = tpwgts[0]/2; tpwgts[0] -= tpwgts[1]; maxpwgt[0] = ubfactor*tpwgts[0]; maxpwgt[1] = ubfactor*tpwgts[1]; minpwgt[0] = (1.0/ubfactor)*tpwgts[0]; minpwgt[1] = (1.0/ubfactor)*tpwgts[1]; /* Allocate memory for graph->rdata. Allocate sufficient memory for both edge and node */ graph->rdata = idxmalloc(5*nvtxs+3, "GrowBisectionNode: graph->rdata"); graph->pwgts = graph->rdata; graph->where = graph->rdata + 3; graph->bndptr = graph->rdata + nvtxs + 3; graph->bndind = graph->rdata + 2*nvtxs + 3; graph->nrinfo = (NRInfoType *)(graph->rdata + 3*nvtxs + 3); graph->id = graph->rdata + 3*nvtxs + 3; graph->ed = graph->rdata + 4*nvtxs + 3; where = graph->where; bndind = graph->bndind; nbfs = (nvtxs <= ctrl->CoarsenTo ? SMALLNIPARTS : LARGENIPARTS); bestcut = tpwgts[0]+tpwgts[1]; for (nbfs++; nbfs>0; nbfs--) { idxset(nvtxs, 0, touched); pwgts[1] = tpwgts[0]+tpwgts[1]; pwgts[0] = 0; idxset(nvtxs, 1, where); queue[0] = RandomInRange(nvtxs); touched[queue[0]] = 1; first = 0; last = 1; nleft = nvtxs-1; drain = 0; /* Start the BFS from queue to get a partition */ if (nbfs >= 1) { for (;;) { if (first == last) { /* Empty. Disconnected graph! */ if (nleft == 0 || drain) break; k = RandomInRange(nleft); for (i=0; i<nvtxs; i++) { if (touched[i] == 0) { if (k == 0) break; else k--; } } queue[0] = i; touched[i] = 1; first = 0; last = 1;; nleft--; } i = queue[first++]; if (pwgts[1]-vwgt[i] < minpwgt[1]) { drain = 1; continue; } where[i] = 0; INC_DEC(pwgts[0], pwgts[1], vwgt[i]); if (pwgts[1] <= maxpwgt[1]) break; drain = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (touched[k] == 0) { queue[last++] = k; touched[k] = 1; nleft--; } } } } /************************************************************* * Do some partition refinement **************************************************************/ Compute2WayPartitionParams(ctrl, graph); Balance2Way(ctrl, graph, tpwgts, ubfactor); FM_2WayEdgeRefine(ctrl, graph, tpwgts, 4); /* Construct and refine the vertex separator */ for (i=0; i<graph->nbnd; i++) where[bndind[i]] = 2; Compute2WayNodePartitionParams(ctrl, graph); FM_2WayNodeRefine(ctrl, graph, ubfactor, 6); /* printf("ISep: [%d %d %d] %d\n", graph->pwgts[0], graph->pwgts[1], graph->pwgts[2], bestcut); */ if (bestcut > graph->mincut) { bestcut = graph->mincut; idxcopy(nvtxs, where, bestwhere); } } graph->mincut = bestcut; idxcopy(nvtxs, bestwhere, where); Compute2WayNodePartitionParams(ctrl, graph); GKfree(&bestwhere, &queue, &touched, LTERM); }
/************************************************************************* * This function is the entry point for ONCMETIS **************************************************************************/ void METIS_NodeND(int *nvtxs, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *perm, idxtype *iperm) { int i, ii, j, l, wflag, nflag; GraphType graph; CtrlType ctrl; idxtype *cptr, *cind, *piperm; if (*numflag == 1) Change2CNumbering(*nvtxs, xadj, adjncy); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; ctrl.oflags = ONMETIS_OFLAGS; ctrl.pfactor = ONMETIS_PFACTOR; ctrl.nseps = ONMETIS_NSEPS; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; ctrl.oflags = options[OPTION_OFLAGS]; ctrl.pfactor = options[OPTION_PFACTOR]; ctrl.nseps = options[OPTION_NSEPS]; } if (ctrl.nseps < 1) ctrl.nseps = 1; ctrl.optype = OP_ONMETIS; ctrl.CoarsenTo = 100; IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); InitRandom(-1); if (ctrl.pfactor > 0) { /*============================================================ * Prune the dense columns ==============================================================*/ piperm = idxmalloc(*nvtxs, "ONMETIS: piperm"); PruneGraph(&ctrl, &graph, *nvtxs, xadj, adjncy, piperm, (float)(0.1*ctrl.pfactor)); } else if (ctrl.oflags&OFLAG_COMPRESS) { /*============================================================ * Compress the graph ==============================================================*/ cptr = idxmalloc(*nvtxs+1, "ONMETIS: cptr"); cind = idxmalloc(*nvtxs, "ONMETIS: cind"); CompressGraph(&ctrl, &graph, *nvtxs, xadj, adjncy, cptr, cind); if (graph.nvtxs >= COMPRESSION_FRACTION*(*nvtxs)) { ctrl.oflags--; /* We actually performed no compression */ GKfree(&cptr, &cind, LTERM); } else if (2*graph.nvtxs < *nvtxs && ctrl.nseps == 1) ctrl.nseps = 2; } else { SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, NULL, NULL, 0); } /*============================================================= * Do the nested dissection ordering --=============================================================*/ ctrl.maxvwgt = 1.5*(idxsum(graph.nvtxs, graph.vwgt)/ctrl.CoarsenTo); AllocateWorkSpace(&ctrl, &graph, 2); if (ctrl.oflags&OFLAG_CCMP) MlevelNestedDissectionCC(&ctrl, &graph, iperm, ORDER_UNBALANCE_FRACTION, graph.nvtxs); else MlevelNestedDissection(&ctrl, &graph, iperm, ORDER_UNBALANCE_FRACTION, graph.nvtxs); FreeWorkSpace(&ctrl, &graph); if (ctrl.pfactor > 0) { /* Order any prunned vertices */ if (graph.nvtxs < *nvtxs) { idxcopy(graph.nvtxs, iperm, perm); /* Use perm as an auxiliary array */ for (i=0; i<graph.nvtxs; i++) iperm[piperm[i]] = perm[i]; for (i=graph.nvtxs; i<*nvtxs; i++) iperm[piperm[i]] = i; } GKfree(&piperm, LTERM); } else if (ctrl.oflags&OFLAG_COMPRESS) { /* Uncompress the ordering */ if (graph.nvtxs < COMPRESSION_FRACTION*(*nvtxs)) { /* construct perm from iperm */ for (i=0; i<graph.nvtxs; i++) perm[iperm[i]] = i; for (l=ii=0; ii<graph.nvtxs; ii++) { i = perm[ii]; for (j=cptr[i]; j<cptr[i+1]; j++) iperm[cind[j]] = l++; } } GKfree(&cptr, &cind, LTERM); } for (i=0; i<*nvtxs; i++) perm[iperm[i]] = i; IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); if (*numflag == 1) Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); }
/************************************************************************* * Let the game begin **************************************************************************/ int main(int argc, char *argv[]) { idxtype i, j, istep, options[10], nn, ne, fstep, lstep, nparts, nboxes, u[3], dim, nchanges, ncomm; char filename[256]; idxtype *mien, *mrng, *part, *oldpart, *sflag, *bestdims, *fepart; double *mxyz, *bxyz; idxtype *xadj, *adjncy, *cntptr, *cntind; idxtype numflag = 0, wgtflag = 0, edgecut, etype=2; void *cinfo; FILE *fpin; long long int *ltmp; if (argc != 6) { mfprintf(stderr, "Usage: %s <nn> <ne> <fstep> <lstep> <nparts>\n", argv[0]); exit(0); } nn = atoi(argv[1]); ne = atoi(argv[2]); fstep = atoi(argv[3]); lstep = atoi(argv[4]); nparts = atoi(argv[5]); mprintf("Reading %s, nn: %D, ne: %D, fstep: %D, lstep: %D, nparts: %D\n", filename, nn, ne, fstep, lstep, nparts); mien = idxmalloc(4*ne, "main: mien"); mxyz = gk_dmalloc(3*nn, "main: mxyz"); mrng = idxmalloc(4*ne, "main: mrng"); bxyz = gk_dmalloc(6*ne*4, "main: bxyz"); fepart = idxmalloc(nn, "main: fepart"); part = idxmalloc(nn, "main: part"); oldpart = idxmalloc(nn, "main: oldpart"); sflag = idxmalloc(nn, "main: sflag"); bestdims = idxsmalloc(2*nparts, -1, "main: bestdims"); xadj = idxmalloc(nn+1, "main: xadj"); adjncy = idxmalloc(50*nn, "main: adjncy"); /*======================================================================== * Read the initial mesh and setup the graph and contact information *========================================================================*/ msprintf(filename, "mien.%04D", fstep); fpin = GKfopen(filename, "rb", "main: mien"); fread(mien, sizeof(int), 4*ne, fpin); for (i=0; i<4*ne; i++) mien[i] = Flip_int32(mien[i]); GKfclose(fpin); msprintf(filename, "mxyz.%04D", fstep); fpin = GKfopen(filename, "rb", "main: mxyz"); fread(mxyz, sizeof(double), 3*nn, fpin); for (i=0; i<3*nn; i++) { ltmp = (long long int *)(mxyz+i); *ltmp = Flip_int64(*ltmp); } GKfclose(fpin); mprintf("%e %e %e\n", mxyz[3*0+0], mxyz[3*0+1], mxyz[3*0+2]); msprintf(filename, "mrng.%04D", fstep); fpin = GKfopen(filename, "rb", "main: mrng"); fread(mrng, sizeof(int), 4*ne, fpin); for (i=0; i<4*ne; i++) mrng[i] = Flip_int32(mrng[i]); GKfclose(fpin); /*======================================================================== * Determine which nodes are in the surface *========================================================================*/ iset(nn, 0, sflag); for (i=0; i<ne; i++) { if (mrng[4*i+0] > 0) { /* 1, 2, 3 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+2]-1] = 1; } if (mrng[4*i+1] > 0) { /* 1, 2, 4 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+3]-1] = 1; } if (mrng[4*i+2] > 0) { /* 2, 3, 4 */ sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+2]-1] = 1; sflag[mien[4*i+3]-1] = 1; } if (mrng[4*i+3] > 0) { /* 1, 3, 4 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+2]-1] = 1; sflag[mien[4*i+3]-1] = 1; } } mprintf("Contact Nodes: %D of %D\n", isum(nn, sflag), nn); /*======================================================================== * Compute the FE partition *========================================================================*/ numflag = mien[idxargmin(4*ne, mien)]; METIS_MeshToNodal(&ne, &nn, mien, &etype, &numflag, xadj, adjncy); options[0] = 0; METIS_PartGraphVKway(&nn, xadj, adjncy, NULL, NULL, &wgtflag, &numflag, &nparts, options, &edgecut, fepart); mprintf("K-way partitioning Volume: %D\n", edgecut); /*======================================================================== * Get into the loop in which you go over the different configurations *========================================================================*/ for (istep=fstep; istep<=lstep; istep++) { msprintf(filename, "mxyz.%04D", istep); mprintf("Reading %s...............................................................\n", filename); fpin = GKfopen(filename, "rb", "main: mxyz"); fread(mxyz, sizeof(double), 3*nn, fpin); for (i=0; i<3*nn; i++) { ltmp = (long long int *)(mxyz+i); *ltmp = Flip_int64(*ltmp); } GKfclose(fpin); msprintf(filename, "mrng.%04D", istep); fpin = GKfopen(filename, "rb", "main: mrng"); fread(mrng, sizeof(int), 4*ne, fpin); for (i=0; i<4*ne; i++) mrng[i] = Flip_int32(mrng[i]); GKfclose(fpin); /* Determine which nodes are in the surface */ iset(nn, 0, sflag); for (i=0; i<ne; i++) { if (mrng[4*i+0] > 0) { /* 1, 2, 3 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+2]-1] = 1; } if (mrng[4*i+1] > 0) { /* 1, 2, 4 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+3]-1] = 1; } if (mrng[4*i+2] > 0) { /* 2, 3, 4 */ sflag[mien[4*i+1]-1] = 1; sflag[mien[4*i+2]-1] = 1; sflag[mien[4*i+3]-1] = 1; } if (mrng[4*i+3] > 0) { /* 1, 3, 4 */ sflag[mien[4*i+0]-1] = 1; sflag[mien[4*i+2]-1] = 1; sflag[mien[4*i+3]-1] = 1; } } mprintf("Contact Nodes: %D of %D\n", isum(nn, sflag), nn); /* Determine the bounding boxes of the surface elements */ for (nboxes=0, i=0; i<ne; i++) { if (mrng[4*i+0] > 0) { /* 1, 2, 3 */ u[0] = mien[4*i+0]-1; u[1] = mien[4*i+1]-1; u[2] = mien[4*i+2]-1; bxyz[6*nboxes+0] = bxyz[6*nboxes+3] = mxyz[3*u[0]+0]; bxyz[6*nboxes+1] = bxyz[6*nboxes+4] = mxyz[3*u[0]+1]; bxyz[6*nboxes+2] = bxyz[6*nboxes+5] = mxyz[3*u[0]+2]; for (j=1; j<3; j++) { for (dim=0; dim<3; dim++) { bxyz[6*nboxes+dim] = (bxyz[6*nboxes+dim] > mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+dim]); bxyz[6*nboxes+3+dim] = (bxyz[6*nboxes+3+dim] < mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+3+dim]); } } nboxes++; } if (mrng[4*i+1] > 0) { /* 1, 2, 4 */ u[0] = mien[4*i+0]-1; u[1] = mien[4*i+1]-1; u[2] = mien[4*i+3]-1; bxyz[6*nboxes+0] = bxyz[6*nboxes+3] = mxyz[3*u[0]+0]; bxyz[6*nboxes+1] = bxyz[6*nboxes+4] = mxyz[3*u[0]+1]; bxyz[6*nboxes+2] = bxyz[6*nboxes+5] = mxyz[3*u[0]+2]; for (j=1; j<3; j++) { for (dim=0; dim<3; dim++) { bxyz[6*nboxes+dim] = (bxyz[6*nboxes+dim] > mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+dim]); bxyz[6*nboxes+3+dim] = (bxyz[6*nboxes+3+dim] < mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+3+dim]); } } nboxes++; } if (mrng[4*i+2] > 0) { /* 2, 3, 4 */ u[0] = mien[4*i+1]-1; u[1] = mien[4*i+2]-1; u[2] = mien[4*i+3]-1; bxyz[6*nboxes+0] = bxyz[6*nboxes+3] = mxyz[3*u[0]+0]; bxyz[6*nboxes+1] = bxyz[6*nboxes+4] = mxyz[3*u[0]+1]; bxyz[6*nboxes+2] = bxyz[6*nboxes+5] = mxyz[3*u[0]+2]; for (j=1; j<3; j++) { for (dim=0; dim<3; dim++) { bxyz[6*nboxes+dim] = (bxyz[6*nboxes+dim] > mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+dim]); bxyz[6*nboxes+3+dim] = (bxyz[6*nboxes+3+dim] < mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+3+dim]); } } nboxes++; } if (mrng[4*i+3] > 0) { /* 1, 3, 4 */ u[0] = mien[4*i+0]-1; u[1] = mien[4*i+2]-1; u[2] = mien[4*i+3]-1; bxyz[6*nboxes+0] = bxyz[6*nboxes+3] = mxyz[3*u[0]+0]; bxyz[6*nboxes+1] = bxyz[6*nboxes+4] = mxyz[3*u[0]+1]; bxyz[6*nboxes+2] = bxyz[6*nboxes+5] = mxyz[3*u[0]+2]; for (j=1; j<3; j++) { for (dim=0; dim<3; dim++) { bxyz[6*nboxes+dim] = (bxyz[6*nboxes+dim] > mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+dim]); bxyz[6*nboxes+3+dim] = (bxyz[6*nboxes+3+dim] < mxyz[3*u[j]+dim] ? mxyz[3*u[j]+dim] : bxyz[6*nboxes+3+dim]); } } nboxes++; } } cinfo = METIS_PartSurfForContactRCB(&nn, mxyz, sflag, &nparts, part, bestdims); METIS_FindContacts(cinfo, &nboxes, bxyz, &nparts, &cntptr, &cntind); METIS_FreeContactInfo(cinfo); nchanges = 0; if (istep > fstep) { for (i=0; i<nn; i++) nchanges += (part[i] != oldpart[i] ? 1 : 0); } idxcopy(nn, part, oldpart); ncomm = ComputeMapCost(nn, nparts, fepart, part); mprintf("Contacting Elements: %D Indices: %D Nchanges: %D MapCost: %D\n", nboxes, cntptr[nboxes]-nboxes, nchanges, ncomm); gk_free((void **)&cntptr, &cntind, LTERM); } }
/************************************************************************* * This function projects a partition, and at the same time computes the * parameters for refinement. **************************************************************************/ void ProjectKWayPartition(CtrlType *ctrl, GraphType *graph, int nparts) { int i, j, k, nvtxs, nbnd, me, other, istart, iend, ndegrees; idxtype *xadj, *adjncy, *adjwgt, *adjwgtsum; idxtype *cmap, *where, *bndptr, *bndind; idxtype *cwhere; GraphType *cgraph; RInfoType *crinfo, *rinfo, *myrinfo; EDegreeType *myedegrees; idxtype *htable; cgraph = graph->coarser; cwhere = cgraph->where; crinfo = cgraph->rinfo; nvtxs = graph->nvtxs; cmap = graph->cmap; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; AllocateKWayPartitionMemory(ctrl, graph, nparts); where = graph->where; rinfo = graph->rinfo; bndind = graph->bndind; bndptr = idxset(nvtxs, -1, graph->bndptr); /* Go through and project partition and compute id/ed for the nodes */ for (i=0; i<nvtxs; i++) { k = cmap[i]; where[i] = cwhere[k]; cmap[i] = crinfo[k].ed; /* For optimization */ } htable = idxset(nparts, -1, idxwspacemalloc(ctrl, nparts)); ctrl->wspace.cdegree = 0; for (nbnd=0, i=0; i<nvtxs; i++) { me = where[i]; myrinfo = rinfo+i; myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0; myrinfo->edegrees = NULL; myrinfo->id = adjwgtsum[i]; if (cmap[i] > 0) { /* If it is an interface node. Note cmap[i] = crinfo[cmap[i]].ed */ istart = xadj[i]; iend = xadj[i+1]; myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += iend-istart; ndegrees = 0; for (j=istart; j<iend; j++) { other = where[adjncy[j]]; if (me != other) { myrinfo->ed += adjwgt[j]; if ((k = htable[other]) == -1) { htable[other] = ndegrees; myedegrees[ndegrees].pid = other; myedegrees[ndegrees++].ed = adjwgt[j]; } else { myedegrees[k].ed += adjwgt[j]; } } } myrinfo->id -= myrinfo->ed; /* Remove space for edegrees if it was interior */ if (myrinfo->ed == 0) { myrinfo->edegrees = NULL; ctrl->wspace.cdegree -= iend-istart; } else { if (myrinfo->ed-myrinfo->id >= 0) BNDInsert(nbnd, bndind, bndptr, i); myrinfo->ndegrees = ndegrees; for (j=0; j<ndegrees; j++) htable[myedegrees[j].pid] = -1; } } } idxcopy(nparts, cgraph->pwgts, graph->pwgts); graph->mincut = cgraph->mincut; graph->nbnd = nbnd; FreeGraph(graph->coarser); graph->coarser = NULL; idxwspacefree(ctrl, nparts); ASSERT(CheckBnd2(graph)); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j; int ncon, mype, npes, gnvtxs, ngroups; idxtype *xadj, *adjncy, *adjwgt, *vwgt; idxtype *part, *gwhere0, *gwhere1; idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; GraphType *agraph; int lnparts, fpart, fpe, lnpes; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { float sum; int rank; } lpesum, gpesum; ncon = graph->ncon; ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); idxcopy(agraph->nedges, agraph->adjncy, adjncy); idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); gnvtxs = agraph->nvtxs; gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ /* ADD: consider changing this to breadth-first type bisection */ moptions[0] = 0; moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (mype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case npes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } /* In case npes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = ssum(ncon, lbvec); edgecut = ComputeSerialEdgeCut(agraph); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpesum.sum = (float) (edgecut); else lpesum.sum = (float) (max_cut); } MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); MPI_Comm_free(&ipcomm); GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }