/************************************************************************* * This function initializes the data structures of the priority queue **************************************************************************/ void PQueueInit(CtrlType *ctrl, PQueueType *queue, int maxnodes, int maxgain) { int i, j, ncore; queue->nnodes = 0; queue->maxnodes = maxnodes; queue->buckets = NULL; queue->nodes = NULL; queue->heap = NULL; queue->locator = NULL; if (maxgain > PLUS_GAINSPAN || maxnodes < 500) queue->type = 2; else queue->type = 1; if (queue->type == 1) { queue->pgainspan = amin(PLUS_GAINSPAN, maxgain); queue->ngainspan = amin(NEG_GAINSPAN, maxgain); j = queue->ngainspan+queue->pgainspan+1; ncore = 2 + (sizeof(ListNodeType)/sizeof(idxtype))*maxnodes + (sizeof(ListNodeType *)/sizeof(idxtype))*j; if (WspaceAvail(ctrl) > ncore) { queue->nodes = (ListNodeType *)idxwspacemalloc(ctrl, (sizeof(ListNodeType)/sizeof(idxtype))*maxnodes); queue->buckets = (ListNodeType **)idxwspacemalloc(ctrl, (sizeof(ListNodeType *)/sizeof(idxtype))*j); queue->mustfree = 0; } else { /* Not enough memory in the wspace, allocate it */ queue->nodes = (ListNodeType *)idxmalloc((sizeof(ListNodeType)/sizeof(idxtype))*maxnodes, "PQueueInit: queue->nodes"); queue->buckets = (ListNodeType **)idxmalloc((sizeof(ListNodeType *)/sizeof(idxtype))*j, "PQueueInit: queue->buckets"); queue->mustfree = 1; } for (i=0; i<maxnodes; i++) queue->nodes[i].id = i; for (i=0; i<j; i++) queue->buckets[i] = NULL; queue->buckets += queue->ngainspan; /* Advance buckets by the ngainspan proper indexing */ queue->maxgain = -queue->ngainspan; } else { queue->heap = (KeyValueType *)idxwspacemalloc(ctrl, (sizeof(KeyValueType)/sizeof(idxtype))*maxnodes); queue->locator = idxwspacemalloc(ctrl, maxnodes); idxset(maxnodes, -1, queue->locator); } }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm) { int i, npes, mype, nvtxs, firstvtx, dbglvl; idxtype *xadj, *adjncy; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int zeroflg = 0; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); return; } /* Setup a fake graph to allow the rest of the code to work unchanged */ dbglvl = 0; nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; /* Proceed with the rest of the code */ SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.seed = mype; ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial geometric partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); GKfree((void **)&xadj, (void **)&adjncy, LTERM); }
// **************************************************************************** // Method: avtAxisRestrictionToolInterface::ResetNumberOfAxes // // Purpose: // Sets the number of available axes. // // Arguments: // n the number of axes // // Programmer: Jeremy Meredith // Creation: February 1, 2008 // // Modifications: // Jeremy Meredith, Fri Feb 15 13:21:20 EST 2008 // Added axis names to the axis restriction attributes. // // **************************************************************************** void avtAxisRestrictionToolInterface::ResetNumberOfAxes(int n) { AxisRestrictionAttributes *a = (AxisRestrictionAttributes *)atts; stringVector aname(n, ""); doubleVector amin(n, -1e+37); doubleVector amax(n, +1e+37); a->SetNames(aname); a->SetMinima(amin); a->SetMaxima(amax); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_EdgeComputeSeparator(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *options, int *sepsize, idxtype *part) { int i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = 0; ctrl.pfactor = 0; ctrl.nseps = 5; ctrl.optype = OP_OEMETIS; ctrl.CoarsenTo = amin(100, *nvtxs-1); ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelEdgeBisection(&ctrl, &graph, tpwgts, 1.05); ConstructMinCoverSeparator(&ctrl, &graph, 1.05); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); GKfree((void**)&graph.gdata, &graph.rdata, &graph.label, LTERM); FreeWorkSpace(&ctrl, &graph); }
/************************************************************************* * This function is the entry point for ONWMETIS. It requires weights on the * vertices. It is for the case that the matrix has been pre-compressed. **************************************************************************/ void METIS_NodeComputeSeparator(idxtype *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, idxtype *options, idxtype *sepsize, idxtype *part) { idxtype i, j, tvwgt, tpwgts[2]; GraphType graph; CtrlType ctrl; SetUpGraph(&graph, OP_ONMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, 3); tvwgt = idxsum(*nvtxs, graph.vwgt, 1); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.oflags = 0; ctrl.pfactor = 0; ctrl.nseps = 3; ctrl.optype = OP_ONMETIS; ctrl.CoarsenTo = amin(100, *nvtxs-1); ctrl.maxvwgt = 1.5*tvwgt/ctrl.CoarsenTo; InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, 2); /*============================================================ * Perform the bisection *============================================================*/ tpwgts[0] = tvwgt/2; tpwgts[1] = tvwgt-tpwgts[0]; MlevelNodeBisectionMultiple(&ctrl, &graph, tpwgts, 1.02); *sepsize = graph.pwgts[2]; idxcopy(*nvtxs, graph.where, part); FreeGraph(&graph, 0); FreeWorkSpace(&ctrl, &graph); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { /* Take care the npes = 1 case */ idxset(vtxdist[1], 0, part); *edgecut = 0; return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); SetUpCtrl(&ctrl, npes, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize"); PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype)); AdaptiveUndirected_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype)); ReMapGraph(&ctrl, graph, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; IMfree((void**)&graph->vsize, LTERM); FreeInitialGraphAndRemap(graph, *wgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
double minAreaFreeRect(int**P, int Psz, int*P0sz){ int N = Psz; bool find = false; ll ans = 1LL<<60; for(int a=0, a_len=(N); a<a_len; ++a) for(int b=0, b_len=(N); b<b_len; ++b) for(int c=0, c_len=(N); c<c_len; ++c) for(int d=0, d_len=(N); d<d_len; ++d){ if (a == b || a == c || a == d || b == c || b == d || c == d) continue; if (P[a][0] - P[b][0] != P[d][0] - P[c][0]) continue; if (P[a][1] - P[b][1] != P[d][1] - P[c][1]) continue; ll x1 = P[a][0] - P[b][0]; ll x2 = P[c][0] - P[b][0]; ll y1 = P[a][1] - P[b][1]; ll y2 = P[c][1] - P[b][1]; if (x1 * x2 + y1 * y2 == 0) { ll area = abs(x1 * y2 - x2 * y1); if (area > 0) { find = true; amin(&ans, area); } } } if (!find) return 0; return ans; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasses) { int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; idxtype *moved, *swaps, *perm; PQueueType parts[2]; int higain, oldgain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); limit = (int) amin(amax(0.01*nvtxs, 15), 100); avgvwgt = amin((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; PQueueInit(ctrl, &parts[0], nvtxs, tmp); PQueueInit(ctrl, &parts[1], nvtxs, tmp); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d\n", pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); origdiff = abs(tpwgts[0]-pwgts[0]); idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ PQueueReset(&parts[0]); PQueueReset(&parts[1]); mincutorder = -1; newcut = mincut = initcut = graph->mincut; mindiff = abs(tpwgts[0]-pwgts[0]); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = perm[ii]; ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); PQueueInsert(&parts[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1); to = (from+1)%2; if ((higain = PQueueGetMax(&parts[from])) == -1) break; ASSERT(bndptr[higain] != -1); newcut -= (ed[higain]-id[higain]); INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); if ((newcut < mincut && abs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || (newcut == mincut && abs(tpwgts[0]-pwgts[0]) < mindiff)) { mincut = newcut; mindiff = abs(tpwgts[0]-pwgts[0]); mincutorder = nswaps; } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); INC_DEC(pwgts[from], pwgts[to], vwgt[higain]); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ PQueueDelete(&parts[where[k]], k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) PQueueUpdate(&parts[where[k]], k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) PQueueInsert(&parts[where[k]], k, ed[k]-id[k]); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum cut: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut == initcut) break; } PQueueFree(ctrl, &parts[0]); PQueueFree(ctrl, &parts[1]); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int i, j; int ltvwgts[MAXNCON]; int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph, *mgraph; idxtype *morder; int minnvtxs; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nparts = npes; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); return; } if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) { if (mype == 0) printf("Error: Too many processors for this many vertices.\n"); return; } minnvtxs = vtxdist[1]-vtxdist[0]; for (i=0; i<npes; i++) minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i]; if (minnvtxs < (int)((float)npes*1.1)) { if (mype == 0) printf("Error: vertices are not distributed equally.\n"); return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts)); ctrl.seed = mype; ctrl.sync = seed; ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts"); ctrl.ubvec[0] = 1.03; graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial k-way partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Moc_Global_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*======================================================= * Move the graph according to the partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); MALLOC_CHECK(NULL); graph->ncon = 1; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); /*======================================================= * Now compute an ordering of the moved graph =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); ctrl.ipart = ISEP_NODE; ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000)); /* compute tvwgts */ for (j=0; j<mgraph->ncon; j++) ltvwgts[j] = 0; for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; for (j=0; j<mgraph->ncon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); MALLOC_CHECK(NULL); /* Invert the ordering back to the original graph */ ProjectInfoBack(&ctrl, graph, order, morder, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); free(ctrl.tpwgts); free(morder); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); MALLOC_CHECK(NULL); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocGeneral2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); qsizes[i][0] = qsizes[i][1] = 0; } for (i=0; i<nvtxs; i++) { qnum[i] = samax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } /* printf("Weight Distribution: \t"); for (i=0; i<ncon; i++) printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); printf("\n"); */ for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = samax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } /* printf("Weight Distribution (after):\t "); for (i=0; i<ncon; i++) printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); printf("\n"); */ for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); minbal = origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal); } idxset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert all nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nvtxs, perm, 1); for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf(", %.3f LB: %.3f\n", minbal, newbal); } /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts)); } graph->mincut = mincut; graph->nbnd = nbnd; for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) { int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; KeyValueType *cand; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); qsizes[i][0] = qsizes[i][1] = 0; } for (i=0; i<nvtxs; i++) { qnum[i] = samax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = samax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; idxset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nvtxs; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nvtxs, cand); for (ii=0; ii<nvtxs; ii++) { i = cand[ii].val; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(oldgain), (float)(ed[k]-id[k])); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k; int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[MAXNCON*2]; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); for (i=0; i<ncon; i++) { rtpwgts[i] = origbal*tpwgts[i]; rtpwgts[ncon+i] = origbal*tpwgts[ncon+i]; } idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { FPQueueReset(&parts[i][0]); FPQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); /* Insert boundary nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nbnd; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nbnd, cand); for (ii=0; ii<nbnd; ii++) { i = bndind[cand[ii].val]; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ FPQueueDelete(&parts[qnum[k]][where[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************** * This subroutine remaps a partitioning on a single processor **************************************************************/ void SerialRemap(GraphType *graph, int nparts, idxtype *base, idxtype *scratch, idxtype *remap, float *tpwgts) { int i, ii, j, k; int nvtxs, nmapped, max_mult; int from, to, current_from, smallcount, bigcount; KeyValueType *flowto, *bestflow; KeyKeyValueType *sortvtx; idxtype *vsize, *htable, *map, *rowmap; nvtxs = graph->nvtxs; vsize = graph->vsize; max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts); sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx"); flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto"); bestflow = flowto+nparts; map = htable = idxsmalloc(nparts*2, -1, "htable"); rowmap = map+nparts; for (i=0; i<nvtxs; i++) { sortvtx[i].key1 = base[i]; sortvtx[i].key2 = vsize[i]; sortvtx[i].val = i; } qsort((void *)sortvtx, (size_t)nvtxs, (size_t)sizeof(KeyKeyValueType), SSMIncKeyCmp); for (j=0; j<nparts; j++) { flowto[j].key = 0; flowto[j].val = j; } /* this step has nparts*nparts*log(nparts) computational complexity */ bigcount = smallcount = current_from = 0; for (ii=0; ii<nvtxs; ii++) { i = sortvtx[ii].val; from = base[i]; to = scratch[i]; if (from > current_from) { /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERTS(idxsum(nparts, htable) == -nparts); ikeysort(smallcount, flowto); for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } smallcount = 0; current_from = from; } if (htable[to] == -1) { htable[to] = smallcount; flowto[smallcount].key = -vsize[i]; flowto[smallcount].val = to; smallcount++; } else { flowto[htable[to]].key += -vsize[i]; } } /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERTS(idxsum(nparts, htable) == -nparts); ikeysort(smallcount, flowto); for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } ikeysort(bigcount, bestflow); ASSERTS(idxsum(nparts, map) == -nparts); ASSERTS(idxsum(nparts, rowmap) == -nparts); nmapped = 0; /* now make as many assignments as possible */ for (ii=0; ii<bigcount; ii++) { i = bestflow[ii].val; j = i % nparts; /* to */ k = i / nparts; /* from */ if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (ii=0; ii<nparts; ii++) { i = (j+ii) % nparts; if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; i<nparts; i++) map[i] = i; for (i=0; i<nvtxs; i++) remap[i] = map[remap[i]]; GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM); }
/************************************************************************* * This is the top level ordering routine **************************************************************************/ void MultilevelOrder(CtrlType *ctrl, GraphType *graph, idxtype *order, idxtype *sizes, WorkSpaceType *wspace) { int i, nparts, nvtxs, npes; idxtype *perm, *lastnode, *morder, *porder; GraphType *mgraph; npes = ctrl->npes; nvtxs = graph->nvtxs; perm = idxmalloc(nvtxs, "MultilevelOrder: perm"); lastnode = idxsmalloc(4*npes, -1, "MultilevelOrder: lastnode"); for (i=0; i<nvtxs; i++) perm[i] = i; lastnode[2] = graph->gnvtxs; idxset(nvtxs, -1, order); sizes[0] = 2*npes-1; graph->where = idxsmalloc(nvtxs, 0, "MultilevelOrder: graph->where"); for (nparts=2; nparts<=ctrl->npes; nparts*=2) { ctrl->nparts = nparts; Order_Partition(ctrl, graph, wspace); LabelSeparators(ctrl, graph, lastnode, perm, order, sizes, wspace); CompactGraph(ctrl, graph, perm, wspace); if (ctrl->CoarsenTo < 100*nparts) { ctrl->CoarsenTo = 1.5*ctrl->CoarsenTo; } ctrl->CoarsenTo = amin(ctrl->CoarsenTo, graph->gnvtxs-1); } /*----------------------------------------------------------------- / Move the graph so that each processor gets its partition -----------------------------------------------------------------*/ IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MoveTmr)); SetUp(ctrl, graph, wspace); graph->ncon = 1; /*needed for Moc_MoveGraph */ mgraph = Moc_MoveGraph(ctrl, graph, wspace); /* Fill in the sizes[] array for the local part. Just the vtxdist of the mgraph */ for (i=0; i<npes; i++) sizes[i] = mgraph->vtxdist[i+1]-mgraph->vtxdist[i]; porder = idxmalloc(graph->nvtxs, "MultilevelOrder: porder"); morder = idxmalloc(mgraph->nvtxs, "MultilevelOrder: morder"); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MoveTmr)); /* Find the local ordering */ LocalNDOrder(ctrl, mgraph, morder, lastnode[2*(ctrl->npes+ctrl->mype)]-mgraph->nvtxs, wspace); /* Project the ordering back to the before-move graph */ ProjectInfoBack(ctrl, graph, porder, morder, wspace); /* Copy the ordering from porder to order using perm */ for (i=0; i<graph->nvtxs; i++) { ASSERT(ctrl, order[perm[i]] == -1); order[perm[i]] = porder[i]; } FreeGraph(mgraph); GKfree((void **)&perm, (void **)&lastnode, (void **)&porder, (void **)&morder, LTERM); /* PrintVector(ctrl, 2*npes-1, 0, sizes, "SIZES"); */ }
/************************************************************************* * This function performs a node-based FM refinement **************************************************************************/ void FM_2WayNodeRefineEqWgt(CtrlType *ctrl, GraphType *graph, idxtype npasses) { idxtype i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; idxtype *mptr, *mind, *moved, *swaps, *perm; PQueueType parts[2]; NRInfoType *rinfo; idxtype higain, oldgain, mincut, initcut, mincutorder; idxtype pass, to, other, limit; idxtype mindiff, newdiff; idxtype u[2], g[2]; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; rinfo = graph->nrinfo; i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt); PQueueInit(ctrl, &parts[0], nvtxs, i); PQueueInit(ctrl, &parts[1], nvtxs, i); moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); mptr = idxwspacemalloc(ctrl, nvtxs+1); mind = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); IFSET(ctrl->dbglvl, DBG_REFINE, mprintf("Partitions: [%6D %6D] Nv-Nb[%6D %6D]. ISep: %6D\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); for (pass=0; pass<npasses; pass++) { idxset(nvtxs, -1, moved); PQueueReset(&parts[0]); PQueueReset(&parts[1]); mincutorder = -1; initcut = mincut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(where[i] == 2); PQueueInsert(&parts[0], i, vwgt[i]-rinfo[i].edegrees[1]); PQueueInsert(&parts[1], i, vwgt[i]-rinfo[i].edegrees[0]); } ASSERT(CheckNodeBnd(graph, nbnd)); ASSERT(CheckNodePartitionParams(graph)); limit = (ctrl->oflags&OFLAG_COMPRESS ? amin(5*nbnd, 400) : amin(2*nbnd, 300)); /****************************************************** * Get into the FM loop *******************************************************/ mptr[0] = nmind = 0; mindiff = idxtype_abs(pwgts[0]-pwgts[1]); to = (pwgts[0] < pwgts[1] ? 0 : 1); for (nswaps=0; nswaps<nvtxs; nswaps++) { to = (pwgts[0] < pwgts[1] ? 0 : 1); if (pwgts[0] == pwgts[1]) { u[0] = PQueueSeeMax(&parts[0]); u[1] = PQueueSeeMax(&parts[1]); if (u[0] != -1 && u[1] != -1) { g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1]; g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0]; to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); } } other = (to+1)%2; if ((higain = PQueueGetMax(&parts[to])) == -1) break; if (moved[higain] == -1) /* Delete if it was in the separator originally */ PQueueDelete(&parts[other], higain, vwgt[higain]-rinfo[higain].edegrees[to]); ASSERT(bndptr[higain] != -1); pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); newdiff = idxtype_abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; } else { if (nswaps - mincutorder > limit) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ oldgain = vwgt[k]-rinfo[k].edegrees[to]; rinfo[k].edegrees[to] += vwgt[higain]; if (moved[k] == -1 || moved[k] == -(2+other)) PQueueUpdate(&parts[other], k, oldgain, oldgain-vwgt[higain]); } else if (where[k] == other) { /* This vertex is pulled into the separator */ ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ where[k] = 2; pwgts[other] -= vwgt[k]; edegrees = rinfo[k].edegrees; edegrees[0] = edegrees[1] = 0; for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] != 2) edegrees[where[kk]] += vwgt[kk]; else { oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; rinfo[kk].edegrees[other] -= vwgt[k]; if (moved[kk] == -1 || moved[kk] == -(2+to)) PQueueUpdate(&parts[to], kk, oldgain, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue. Only one side! */ if (moved[k] == -1) { PQueueInsert(&parts[to], k, vwgt[k]-edegrees[other]); moved[k] = -(2+to); } } } mptr[nswaps+1] = nmind; IFSET(ctrl->dbglvl, DBG_MOVEINFO, mprintf("Moved %6D to %3D, Gain: %5D [%5D] [%4D %4D] \t[%5D %5D %5D]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); } /**************************************************************** * Roll back computation *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); to = where[higain]; other = (to+1)%2; INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); where[higain] = 2; BNDInsert(nbnd, bndind, bndptr, higain); edegrees = rinfo[higain].edegrees; edegrees[0] = edegrees[1] = 0; for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) rinfo[k].edegrees[to] -= vwgt[higain]; else edegrees[where[k]] += vwgt[k]; } /* Push nodes out of the separator */ for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { k = mind[j]; ASSERT(where[k] == 2); where[k] = other; INC_DEC(pwgts[other], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] == 2) rinfo[kk].edegrees[other] += vwgt[k]; } } } ASSERT(mincut == pwgts[2]); IFSET(ctrl->dbglvl, DBG_REFINE, mprintf("\tMinimum sep: %6D at %5D, PWGTS: [%6D %6D], NBND: %6D\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut >= initcut) break; } PQueueFree(ctrl, &parts[0]); PQueueFree(ctrl, &parts[1]); idxwspacefree(ctrl, nvtxs+1); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[2]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); rtpwgts[0] = origbal*tpwgts[0]; rtpwgts[1] = origbal*tpwgts[1]; /* if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal); } */ idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { PQueueReset(&parts[i][0]); PQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); minbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(ed[i] > 0 || id[i] == 0); ASSERT(bndptr[i] != -1); PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; ASSERT(bndptr[higain] != -1); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /* if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf(", %.3f LB: %.3f\n", minbal, newbal); } */ /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } /* if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts)); } */ graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function is the entry point of the initial balancing algorithm. * This algorithm assembles the graph to all the processors and preceeds * with the balancing step. **************************************************************************/ void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j, mype, npes, nvtxs, nedges, ncon; idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize; idxtype *part, *lwhere, *home; GraphType *agraph, cgraph; CtrlType myctrl; int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls; float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum; float rating, max_rating, your_cost = -1.0, your_balance = -1.0; float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2]; MPI_Status status; MPI_Comm ipcomm, srcomm; struct { float cost; int rank; } lpecost, gpecost; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); vtxdist = graph->vtxdist; agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); nvtxs = cgraph.nvtxs = agraph->nvtxs; nedges = cgraph.nedges = agraph->nedges; ncon = cgraph.ncon = agraph->ncon; xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj"); vwgt = cgraph.vwgt = xadj + nvtxs+1; vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1; cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1; lwhere = xadj + nvtxs*(3+ncon)+1; home = xadj + nvtxs*(4+ncon)+1; adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1; adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges; /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; idxcopy(nvtxs+1, agraph->xadj, xadj); idxcopy(nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(nvtxs, agraph->vsize, vsize); idxcopy(nedges, agraph->adjncy, adjncy); idxcopy(nedges, agraph->adjwgt, adjwgt); /****************************************/ /****************************************/ if (ctrl->ps_relation == DISCOUPLED) { rcounts = imalloc(ctrl->npes, "rcounts"); rdispls = imalloc(ctrl->npes+1, "rdispls"); for (i=0; i<ctrl->npes; i++) { rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i]; } MAKECSR(i, ctrl->npes, rdispls); MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE, (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm); for (i=0; i<agraph->nvtxs; i++) home[i] = part[i]; GKfree((void **)&rcounts, (void **)&rdispls, LTERM); } else { for (i=0; i<ctrl->npes; i++) for (j=vtxdist[i]; j<vtxdist[i+1]; j++) part[j] = home[j] = i; } /* Ensure that the initial partitioning is legal */ for (i=0; i<agraph->nvtxs; i++) { if (part[i] >= ctrl->nparts) part[i] = home[i] = part[i] % ctrl->nparts; if (part[i] < 0) part[i] = home[i] = (-1*part[i]) % ctrl->nparts; } /****************************************/ /****************************************/ IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec)); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph))); for (i=0; i<agraph->ncon; i++) IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i])); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n")); /****************************************/ /* Split the processors into two groups */ /****************************************/ sr = (ctrl->mype % 2 == 0) ? 1 : 0; gd = (ctrl->mype % 2 == 1) ? 1 : 0; if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) { sr = 1; gd = 0; } sr_pe = 0; gd_pe = 1; MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); myctrl.dbglvl = 0; myctrl.mype = mype; myctrl.npes = npes; myctrl.comm = ipcomm; myctrl.sync = ctrl->sync; myctrl.seed = ctrl->seed; myctrl.nparts = ctrl->nparts; myctrl.ipc_factor = ctrl->ipc_factor; myctrl.redist_factor = ctrl->redist_base; myctrl.partType = ADAPTIVE_PARTITION; myctrl.ps_relation = DISCOUPLED; myctrl.tpwgts = ctrl->tpwgts; icopy(ncon, ctrl->tvwgts, myctrl.tvwgts); icopy(ncon, ctrl->ubvec, myctrl.ubvec); if (sr == 1) { /*******************************************/ /* Half of the processors do scratch-remap */ /*******************************************/ ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1); MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm); MPI_Comm_rank(srcomm, &srmype); MPI_Comm_size(srcomm, &srnpes); moptions[0] = 0; moptions[7] = ctrl->sync + (mype % ngroups) + 1; idxset(nvtxs, 0, lwhere); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = srnpes; while (lnpes > 1 && lnparts > 1) { ASSERT(ctrl, agraph->nvtxs > 1); /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (agraph->ncon == 1) { METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (srmype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case srnpes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (srmype == fpe) { for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart; } } /* In case srnpes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm); edgecut = ComputeSerialEdgeCut(&cgraph); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = (float)edgecut; else lpecost.cost = (float)max_cut + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) { MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm); } if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) { MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); } if (ctrl->mype == sr_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } MPI_Comm_free(&srcomm); } /**************************************/ /* The other half do global diffusion */ /**************************************/ else { /******************************************************************/ /* The next stmt is required to balance out the sr MPI_Comm_split */ /******************************************************************/ MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm); if (ncon == 1) { rating = WavefrontDiffusion(&myctrl, agraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); /* Determine which PE computed the best partitioning */ MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = rating; else lpecost.cost = max_rating + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); /* Now send this to the coordinating processor */ if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe) MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm); if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe) MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); if (ctrl->mype == gd_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } } else { Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES); } } if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) { if (ctrl->mype == sr_pe || ctrl->mype == gd_pe) { /********************************************************************/ /* The coordinators from each group decide on the best partitioning */ /********************************************************************/ my_cut = (float) ComputeSerialEdgeCut(&cgraph); my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); my_balance = ssum(cgraph.ncon, lbvec); my_balance /= (float) cgraph.ncon; my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv; IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n", (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance)); if (ctrl->mype == gd_pe) { buffer[0] = my_cost; buffer[1] = my_balance; MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm); } else { MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status); your_cost = buffer[0]; your_balance = buffer[1]; } } if (ctrl->mype == sr_pe) { who_wins = gd_pe; if ((my_balance < 1.1 && your_balance > 1.1) || (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) || (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) { who_wins = sr_pe; } } MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm); } else { who_wins = sr_pe; } MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm); idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where); MPI_Comm_free(&ipcomm); GKfree((void **)&xadj, (void **)&mytpwgts, LTERM); /* For whatever reason, FreeGraph crashes here...so explicitly free the memory. FreeGraph(agraph); */ GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM); GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM); GKfree((void **)&agraph, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i; int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int tewgt, tvsize, nmoved, maxin, maxout; float gtewgt, gtvsize, avg, maximb; int ps_relation, seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /* ADD: take care of disconnected graph */ /* ADD: take care of highly unbalanced vtxdist */ /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /**************************/ /* Set up data structures */ /**************************/ if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED; } SetUpCtrl(&ctrl, inparts, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts)); ctrl.ipc_factor = 1000.0; ctrl.redist_factor = 1.0; ctrl.redist_base = 1.0; ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = REFINE_PARTITION; ctrl.ps_relation = ps_relation; ctrl.tpwgts = itpwgts; graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize"); graph->home = idxmalloc(graph->nvtxs, "home"); if (ctrl.ps_relation == COUPLED) idxset(graph->nvtxs, mype, graph->home); else idxcopy(graph->nvtxs, part, graph->home); tewgt = idxsum(graph->nedges, graph->adjwgt); tvsize = idxsum(graph->nvtxs, graph->vsize); gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl.edge_size_ratio = gtewgt/gtvsize; scopy(incon, iubvec, ctrl.ubvec); PreAllocateMemory(&ctrl, graph, &wspace); /***********************/ /* Partition and Remap */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Adaptive_Partition(&ctrl, graph, &wspace); ParallelReMapGraph(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); idxcopy(graph->nvtxs, graph->where, part); if (edgecut != NULL) *edgecut = graph->mincut; /***********************/ /* Take care of output */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); } /*************************************/ /* Free memory, renumber, and return */ /*************************************/ GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM); GKfree((void **)&itpwgts, LTERM); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); return; }
/************************************************************************* * This function performs a node-based FM refinement. This is the * one-way version **************************************************************************/ void FM_2WayNodeRefine_OneSided(CtrlType *ctrl, GraphType *graph, float ubfactor, idxtype npasses) { idxtype i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; idxtype *mptr, *mind, *swaps, *perm; PQueueType parts; NRInfoType *rinfo; idxtype higain, oldgain, mincut, initcut, mincutorder; idxtype pass, to, other, limit; idxtype badmaxpwgt, mindiff, newdiff; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; rinfo = graph->nrinfo; PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt)); perm = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); mptr = idxwspacemalloc(ctrl, nvtxs+1); mind = idxwspacemalloc(ctrl, nvtxs); IFSET(ctrl->dbglvl, DBG_REFINE, mprintf("Partitions-N1: [%6D %6D] Nv-Nb[%6D %6D]. ISep: %6D\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); badmaxpwgt = (int)(ubfactor*(pwgts[0]+pwgts[1]+pwgts[2])/2); to = (pwgts[0] < pwgts[1] ? 1 : 0); for (pass=0; pass<npasses; pass++) { other = to; to = (to+1)%2; PQueueReset(&parts); mincutorder = -1; initcut = mincut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(where[i] == 2); PQueueInsert(&parts, i, vwgt[i]-rinfo[i].edegrees[other]); } ASSERT(CheckNodeBnd(graph, nbnd)); ASSERT(CheckNodePartitionParams(graph)); limit = (ctrl->oflags&OFLAG_COMPRESS ? amin(5*nbnd, 400) : amin(2*nbnd, 300)); /****************************************************** * Get into the FM loop *******************************************************/ mptr[0] = nmind = 0; mindiff = idxtype_abs(pwgts[0]-pwgts[1]); for (nswaps=0; nswaps<nvtxs; nswaps++) { if ((higain = PQueueGetMax(&parts)) == -1) break; ASSERT(bndptr[higain] != -1); if (pwgts[to]+vwgt[higain] > badmaxpwgt) break; /* No point going any further. Balance will be bad */ pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); newdiff = idxtype_abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; } else { if (nswaps - mincutorder > limit) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ rinfo[k].edegrees[to] += vwgt[higain]; } else if (where[k] == other) { /* This vertex is pulled into the separator */ ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ where[k] = 2; pwgts[other] -= vwgt[k]; edegrees = rinfo[k].edegrees; edegrees[0] = edegrees[1] = 0; for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] != 2) edegrees[where[kk]] += vwgt[kk]; else { oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; rinfo[kk].edegrees[other] -= vwgt[k]; /* Since the moves are one-sided this vertex has not been moved yet */ PQueueUpdateUp(&parts, kk, oldgain, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ PQueueInsert(&parts, k, vwgt[k]-edegrees[other]); } } mptr[nswaps+1] = nmind; IFSET(ctrl->dbglvl, DBG_MOVEINFO, mprintf("Moved %6D to %3D, Gain: %5D [%5D] \t[%5D %5D %5D] [%3D %2D]\n", higain, to, (vwgt[higain]-rinfo[higain].edegrees[other]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); } /**************************************************************** * Roll back computation *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); ASSERT(where[higain] == to); INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); where[higain] = 2; BNDInsert(nbnd, bndind, bndptr, higain); edegrees = rinfo[higain].edegrees; edegrees[0] = edegrees[1] = 0; for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) rinfo[k].edegrees[to] -= vwgt[higain]; else edegrees[where[k]] += vwgt[k]; } /* Push nodes out of the separator */ for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { k = mind[j]; ASSERT(where[k] == 2); where[k] = other; INC_DEC(pwgts[other], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] == 2) rinfo[kk].edegrees[other] += vwgt[k]; } } } ASSERT(mincut == pwgts[2]); IFSET(ctrl->dbglvl, DBG_REFINE, mprintf("\tMinimum sep: %6D at %5D, PWGTS: [%6D %6D], NBND: %6D\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) break; } PQueueFree(ctrl, &parts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs+1); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j; int ncon, mype, npes, gnvtxs, ngroups; idxtype *xadj, *adjncy, *adjwgt, *vwgt; idxtype *part, *gwhere0, *gwhere1; idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; GraphType *agraph; int lnparts, fpart, fpe, lnpes; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { float sum; int rank; } lpesum, gpesum; ncon = graph->ncon; ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); idxcopy(agraph->nedges, agraph->adjncy, adjncy); idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); gnvtxs = agraph->nvtxs; gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ /* ADD: consider changing this to breadth-first type bisection */ moptions[0] = 0; moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (mype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case npes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } /* In case npes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = ssum(ncon, lbvec); edgecut = ComputeSerialEdgeCut(agraph); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpesum.sum = (float) (edgecut); else lpesum.sum = (float) (max_cut); } MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); MPI_Comm_free(&ipcomm); GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON]; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; float *maxwgt, *minwgt, tvec[MAXNCON]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 15), 100); /* Setup the weight intervals of the two subdomains */ minwgt = fwspacemalloc(ctrl, 2*ncon); maxwgt = fwspacemalloc(ctrl, 2*ncon); for (i=0; i<2; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j]; minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]); } } /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal); for (i=0; i<ncon; i++) minbal[i] = origbal[i]; newcut = mincut = graph->mincut; mincutorder = -1; if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut); for (i=0; i<ncon; i++) printf("%.3f ", origbal[i]); printf("[B]\n"); } idxset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert all nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nvtxs, perm, 1); for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (AreAllBelow(ncon, minbal, ubvec)) break; SelectQueue3(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, newbal); if (IsBetter2wayBalance(ncon, newbal, minbal, ubvec) || (IsBetter2wayBalance(ncon, newbal, origbal, ubvec) && newcut < mincut)) { mincut = newcut; for (i=0; i<ncon; i++) minbal[i] = newbal[i]; mincutorder = nswaps; } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (i=0; i<ncon; i++) printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec); printf(", LB: "); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); if (mincutorder == nswaps) printf(" *\n"); else printf("\n"); } /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (i=0; i<ncon; i++) printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]); printf("], LB: "); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("\n"); } graph->mincut = mincut; graph->nbnd = nbnd; for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); fwspacefree(ctrl, 2*ncon); fwspacefree(ctrl, 2*ncon); }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i, j; int nvtxs = -1, npes, mype; int uwgtflag, cut, gcut, maxnvtxs; int ltvwgts[MAXNCON]; int moptions[10]; CtrlType ctrl; idxtype *uvwgt; WorkSpaceType wspace; GraphType *graph, *mgraph; float avg, maximb, balance, *mytpwgts; int seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /******************************/ /* Take care of npes = 1 case */ /******************************/ if (npes == 1 && inparts > 1) { moptions[0] = 0; nvtxs = vtxdist[1]; if (incon == 1) { METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, itpwgts, moptions, edgecut, part); } else { /* ADD: this is because METIS does not support tpwgts for all constraints */ mytpwgts = fmalloc(inparts, "mytpwgts"); for (i=0; i<inparts; i++) mytpwgts[i] = itpwgts[i*incon]; moptions[7] = -1; METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, mytpwgts, moptions, edgecut, part); free(mytpwgts); } return; } if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; } SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = itpwgts; scopy(incon, iubvec, ctrl.ubvec); uwgtflag = iwgtflag|2; uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt"); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag); free(graph->nvwgt); graph->nvwgt = NULL; PreAllocateMemory(&ctrl, graph, &wspace); /*================================================================= * Compute the initial npes-way partitioning geometric partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*================================================================= * Move the graph according to the partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); free(uvwgt); graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt"); graph->ncon = incon; j = ctrl.nparts; ctrl.nparts = ctrl.npes; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); ctrl.nparts = j; /**********************************************************/ /* Do the same functionality as Moc_SetUpGraph for mgraph */ /**********************************************************/ /* compute tvwgts */ for (j=0; j<incon; j++) ltvwgts[j] = 0; for (i=0; i<graph->nvtxs; i++) for (j=0; j<incon; j++) ltvwgts[j] += mgraph->vwgt[i*incon+j]; for (j=0; j<incon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); /* check for zero wgt constraints */ for (i=0; i<incon; i++) { /* ADD: take care of the case in which tvwgts is zero */ if (ctrl.tvwgts[i] == 0) { if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i); MPI_Finalize(); exit(-1); } } /* compute nvwgt */ mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<incon; j++) mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); if (ctrl.dbglvl&DBG_INFO) { cut = 0; for (i=0; i<graph->nvtxs; i++) for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; gcut = GlobalSESum(&ctrl, cut)/2; maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs); balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes)); rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } /*================================================================= * Set up the newly moved graph =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ctrl.nparts = inparts; FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); /*======================================================= * Now compute the partition of the moved graph =======================================================*/ if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) { IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); PartitionSmallGraph(&ctrl, mgraph, &wspace); } else { Moc_Global_Partition(&ctrl, mgraph, &wspace); } ParallelReMapGraph(&ctrl, mgraph, &wspace); /* Invert the ordering back to the original graph */ ctrl.nparts = npes; ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace); *edgecut = mgraph->mincut; IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); /*******************/ /* Print out stats */ /*******************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); } GKfree((void **)&itpwgts, LTERM); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }