/************************************************************************* * This function is the entry point for KWMETIS **************************************************************************/ void METIS_WPartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) { int i, j; GraphType graph; CtrlType ctrl; if (*numflag == 1) Change2CNumbering(*nvtxs, xadj, adjncy); SetUpGraph(&graph, OP_KMETIS, *nvtxs, 1, xadj, adjncy, vwgt, adjwgt, *wgtflag); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = KMETIS_CTYPE; ctrl.IType = KMETIS_ITYPE; ctrl.RType = KMETIS_RTYPE; ctrl.dbglvl = KMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.optype = OP_KMETIS; ctrl.CoarsenTo = amax((*nvtxs)/(40*log2i(*nparts)), 20*(*nparts)); ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo); InitRandom(-1); AllocateWorkSpace(&ctrl, &graph, *nparts); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); *edgecut = MlevelKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.03); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); FreeWorkSpace(&ctrl, &graph); if (*numflag == 1) Change2FNumbering(*nvtxs, xadj, adjncy, part); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_RM_NVW(CtrlType *ctrl, GraphType *graph) { int i, ii, j, nvtxs, cnvtxs, maxidx; idxtype *xadj, *adjncy; idxtype *match, *cmap, *perm; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; cmap = graph->cmap; match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); perm = idxwspacemalloc(ctrl, nvtxs); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { /* Unmatched */ maxidx = i; /* Find a random matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { if (match[adjncy[j]] == UNMATCHED) { maxidx = adjncy[j]; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); CreateCoarseGraph_NVW(ctrl, graph, cnvtxs, match, perm); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
void informdoorbell (void) { packet pkt; char buzzer_pressed=BUZZER_PRESSED; /* Make a noise */ starttimer(40000); buzzerled(BUZZER); while(gettimer()>10000); buzzerled(ALLOFF); stoptimer(); /* Send packet */ pkt.datatype=CONTROL; pkt.data=&(buzzer_pressed); pkt.length=1; senddata(&pkt); }
/* static int broadcast_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu){ int hotcpu = (unsigned long)hcpu; struct jz_timerevent *evt = &per_cpu(jzclockevent, hotcpu); if(hotcpu != 0) { switch(action & 0xf) { case CPU_DEAD: jz_set_mode(CLOCK_EVT_MODE_SHUTDOWN,&evt->clkevt); break; case CPU_ONLINE: jz_set_mode(CLOCK_EVT_MODE_RESUME,&evt->clkevt); break; } } return NOTIFY_OK; } */ static void jz_clockevent_init(struct jz_timerevent *evt_dev,int cpu) { struct clock_event_device *cd = &evt_dev->clkevt; struct clk *ext_clk = clk_get(NULL,"ext1"); spin_lock_init(&evt_dev->lock); evt_dev->rate = clk_get_rate(ext_clk) / CLKEVENT_DIV; clk_put(ext_clk); stoptimer(evt_dev); outl((1 << evt_dev->ch),evt_dev->ctrl_addr + TCU_TMCR); outl(CSRDIV(CLKEVENT_DIV) | CSR_EXT_EN,evt_dev->config_addr); if(evt_dev->requestirq == 0){ evt_dev->evt_action.handler = jz_timer_interrupt; evt_dev->evt_action.thread_fn = NULL; evt_dev->evt_action.flags = IRQF_DISABLED | IRQF_PERCPU | IRQF_TIMER; evt_dev->evt_action.name = "jz-timerirq"; evt_dev->evt_action.dev_id = (void*)evt_dev; if(setup_irq(evt_dev->irq, &evt_dev->evt_action) < 0) { pr_err("timer request irq error\n"); BUG(); } evt_dev->requestirq = 1; } memset(cd,0,sizeof(struct clock_event_device)); cd->name = "jz-clockenvent"; cd->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC; cd->shift = 10; cd->rating = 400; cd->set_mode = jz_set_mode; cd->set_next_event = jz_set_next_event; cd->irq = evt_dev->irq; cd->cpumask = cpumask_of(cpu); clockevents_config_and_register(cd,evt_dev->rate,4,65536); /* evt_dev->cpu_notify.notifier_call = broadcast_cpuhp_notify; if(cpu == 0){ register_cpu_notifier(&evt_dev->cpu_notify); } */ }
/************************************************************************* * This function computes the initial bisection of the coarsest graph **************************************************************************/ void InitSeparator(CtrlType *ctrl, GraphType *graph, float ubfactor) { int dbglvl; dbglvl = ctrl->dbglvl; IFSET(ctrl->dbglvl, DBG_REFINE, ctrl->dbglvl -= DBG_REFINE); IFSET(ctrl->dbglvl, DBG_MOVEINFO, ctrl->dbglvl -= DBG_MOVEINFO); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); GrowBisectionNode(ctrl, graph, ubfactor); Compute2WayNodePartitionParams(ctrl, graph); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial Sep: %d\n", graph->mincut)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); ctrl->dbglvl = dbglvl; }
/*********************************************************************************** * This function is the testing routine for the adaptive multilevel partitioning code. * It computes a partition from scratch, it then moves the graph and changes some * of the vertex weights and then call the adaptive code. ************************************************************************************/ void TestParMGridGen(char *filename, int *options, int minsize, int maxsize, MPI_Comm comm) { int i, nparts, npes, mype; MGridGraphType graph; idxtype *part; double tmr; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); MGridReadTestGraph(&graph, filename, comm); part = idxmalloc(graph.nvtxs, "TestParMGridGen: part"); /*====================================================================== / ParMETIS_AspectRatio /=======================================================================*/ if (mype==0) printf("------------------------ PARAMETERS --------------------------------------\n"); for (i=0; i<npes; i++) if (mype == i) printf("%s, Dim=%d [%2d %2d] CType=%d RType=%d Nvtxs=%d Nedges=%d\n", filename, options[OPTION_DIM], minsize, maxsize, options[OPTION_CTYPE], options[OPTION_RTYPE], graph.nvtxs, graph.nedges); cleartimer(tmr); MPI_Barrier(comm); starttimer(tmr); ParMGridGen(graph.vtxdist, graph.xadj, graph.vvol, graph.vsurf, graph.adjncy, graph.adjwgt, &nparts, minsize, maxsize, options, part, &comm); MPI_Barrier(comm); stoptimer(tmr); printf("Total Time = %lf\n", gettimer(tmr)); WriteParallelPartition(filename, part, graph.vtxdist, nparts, mype, npes); IMfree(&graph.vtxdist, &graph.xadj, &graph.vvol, &graph.vsurf, &graph.vwgt, &graph.adjncy, &graph.adjwgt, &part, LTERM); }
static void jz_clockevent_init(struct jz_timerevent *evt_dev) { struct clock_event_device *cd = &evt_dev->clkevt; struct clk *ext_clk = clk_get(NULL, "ext1"); spin_lock_init(&evt_dev->lock); evt_dev->rate = clk_get_rate(ext_clk) / CLKEVENT_DIV; clk_put(ext_clk); evt_dev->clk_gate = clk_get(NULL, "tcu"); if (IS_ERR(evt_dev->clk_gate)) { evt_dev->clk_gate = NULL; printk("warning: tcu clk get fail!\n"); } if (evt_dev->clk_gate) clk_enable(evt_dev->clk_gate); stoptimer(); tcu_writel(CH_TCSR(CLKEVENT_CH), CSRDIV(CLKEVENT_DIV) | CSR_EXT_EN); evt_dev->evt_action.handler = jz_timer_interrupt; evt_dev->evt_action.thread_fn = NULL; evt_dev->evt_action.flags = IRQF_DISABLED | IRQF_TIMER; evt_dev->evt_action.name = "jz-timerirq"; evt_dev->evt_action.dev_id = (void*)evt_dev; if (setup_irq(IRQ_TCU1, &evt_dev->evt_action) < 0) { pr_err("timer request irq error\n"); BUG(); } memset(cd, 0, sizeof(struct clock_event_device)); cd->name = "jz-clockenvent"; cd->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC; cd->shift = 10; cd->rating = 400; cd->set_mode = jz_set_mode; cd->set_next_event = jz_set_next_event; cd->irq = IRQ_TCU1; cd->cpumask = cpumask_of(0); clockevents_config_and_register(cd, evt_dev->rate, 4, 65536); printk("clockevents_config_and_register success.\n"); }
/************************************************************************* * This function takes a graph and produces a bisection of it **************************************************************************/ int MlevelVolKWayPartitioning(CtrlType *ctrl, GraphType *graph, int nparts, idxtype *part, float *tpwgts, float ubfactor) { int i, j, nvtxs, tvwgt, tpwgts2[2]; GraphType *cgraph; int wgtflag=3, numflag=0, options[10], edgecut; cgraph = Coarsen2Way(ctrl, graph); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); AllocateVolKWayPartitionMemory(ctrl, cgraph, nparts); options[0] = 1; options[OPTION_CTYPE] = MATCH_SHEMKWAY; options[OPTION_ITYPE] = IPART_GGPKL; options[OPTION_RTYPE] = RTYPE_FM; options[OPTION_DBGLVL] = 0; METIS_WPartGraphRecursive(&cgraph->nvtxs, cgraph->xadj, cgraph->adjncy, cgraph->vwgt, cgraph->adjwgt, &wgtflag, &numflag, &nparts, tpwgts, options, &edgecut, cgraph->where); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); IFSET(ctrl->dbglvl, DBG_IPART, printf("Initial %d-way partitioning cut: %d\n", nparts, edgecut)); IFSET(ctrl->dbglvl, DBG_KWAYPINFO, ComputePartitionInfo(cgraph, nparts, cgraph->where)); RefineVolKWay(ctrl, graph, cgraph, nparts, tpwgts, ubfactor); idxcopy(graph->nvtxs, graph->where, part); GKfree((void**)&graph->gdata, &graph->rdata, LTERM); return graph->minvol; }
/************************************************************************* * This function computes the initial id/ed **************************************************************************/ void ComputeVolKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts) { int i, ii, j, k, kk, l, nvtxs, nbnd, mincut, minvol, me, other, pid; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *pwgts, *where; VRInfoType *rinfo, *myrinfo, *orinfo; VEDegreeType *myedegrees, *oedegrees; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; pwgts = idxset(nparts, 0, graph->pwgts); rinfo = graph->vrinfo; starttimer(ctrl->AuxTmr1); /*------------------------------------------------------------ / Compute now the id/ed degrees /------------------------------------------------------------*/ ctrl->wspace.cdegree = 0; mincut = 0; for (i=0; i<nvtxs; i++) { me = where[i]; pwgts[me] += vwgt[i]; myrinfo = rinfo+i; myrinfo->id = myrinfo->ed = myrinfo->nid = myrinfo->ndegrees = 0; myrinfo->edegrees = NULL; for (j=xadj[i]; j<xadj[i+1]; j++) { if (me == where[adjncy[j]]) { myrinfo->id += adjwgt[j]; myrinfo->nid++; } } myrinfo->ed = graph->adjwgtsum[i] - myrinfo->id; mincut += myrinfo->ed; /* Time to compute the particular external degrees */ if (myrinfo->ed > 0) { myedegrees = myrinfo->edegrees = ctrl->wspace.vedegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[i+1]-xadj[i]; for (j=xadj[i]; j<xadj[i+1]; j++) { other = where[adjncy[j]]; if (me != other) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == other) { myedegrees[k].ed += adjwgt[j]; myedegrees[k].ned++; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].gv = 0; myedegrees[myrinfo->ndegrees].pid = other; myedegrees[myrinfo->ndegrees].ed = adjwgt[j]; myedegrees[myrinfo->ndegrees++].ned = 1; } } } ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } } graph->mincut = mincut/2; stoptimer(ctrl->AuxTmr1); ComputeKWayVolGains(ctrl, graph, nparts); }
/************************************************************************* * This function is the entry point for the node ND code for ParMETIS **************************************************************************/ void METIS_NodeNDP(int nvtxs, idxtype *xadj, idxtype *adjncy, int npes, int *options, idxtype *perm, idxtype *iperm, idxtype *sizes) { int i, ii, j, l, wflag, nflag; GraphType graph; CtrlType ctrl; idxtype *cptr, *cind; if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = ONMETIS_CTYPE; ctrl.IType = ONMETIS_ITYPE; ctrl.RType = ONMETIS_RTYPE; ctrl.dbglvl = ONMETIS_DBGLVL; ctrl.oflags = ONMETIS_OFLAGS; ctrl.pfactor = ONMETIS_PFACTOR; ctrl.nseps = ONMETIS_NSEPS; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; ctrl.oflags = options[OPTION_OFLAGS]; ctrl.pfactor = options[OPTION_PFACTOR]; ctrl.nseps = options[OPTION_NSEPS]; } if (ctrl.nseps < 1) ctrl.nseps = 1; ctrl.optype = OP_ONMETIS; ctrl.CoarsenTo = 100; IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); InitRandom(-1); if (ctrl.oflags&OFLAG_COMPRESS) { /*============================================================ * Compress the graph ==============================================================*/ cptr = idxmalloc(nvtxs+1, "ONMETIS: cptr"); cind = idxmalloc(nvtxs, "ONMETIS: cind"); CompressGraph(&ctrl, &graph, nvtxs, xadj, adjncy, cptr, cind); if (graph.nvtxs >= COMPRESSION_FRACTION*(nvtxs)) { ctrl.oflags--; /* We actually performed no compression */ GKfree((void**)&cptr, &cind, LTERM); } else if (2*graph.nvtxs < nvtxs && ctrl.nseps == 1) ctrl.nseps = 2; } else { SetUpGraph(&graph, OP_ONMETIS, nvtxs, 1, xadj, adjncy, NULL, NULL, 0); } /*============================================================= * Do the nested dissection ordering --=============================================================*/ ctrl.maxvwgt = 1.5*(idxsum(graph.nvtxs, graph.vwgt)/ctrl.CoarsenTo); AllocateWorkSpace(&ctrl, &graph, 2); idxset(2*npes-1, 0, sizes); MlevelNestedDissectionP(&ctrl, &graph, iperm, graph.nvtxs, npes, 0, sizes); FreeWorkSpace(&ctrl, &graph); if (ctrl.oflags&OFLAG_COMPRESS) { /* Uncompress the ordering */ if (graph.nvtxs < COMPRESSION_FRACTION*(nvtxs)) { /* construct perm from iperm */ for (i=0; i<graph.nvtxs; i++) perm[iperm[i]] = i; for (l=ii=0; ii<graph.nvtxs; ii++) { i = perm[ii]; for (j=cptr[i]; j<cptr[i+1]; j++) iperm[cind[j]] = l++; } } GKfree((void**)&cptr, &cind, LTERM); } for (i=0; i<nvtxs; i++) perm[iperm[i]] = i; IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); }
/************************************************************************* * This function is the entry point for PWMETIS that accepts exact weights * for the target partitions **************************************************************************/ void METIS_mCPartGraphRecursive2(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *edgecut, idxtype *part) { int i, j; GraphType graph; CtrlType ctrl; float *mytpwgts; float avgwgt; if (*numflag == 1) Change2CNumbering(*nvtxs, xadj, adjncy); SetUpGraph(&graph, OP_PMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); graph.npwgts = NULL; mytpwgts = fmalloc(*nparts, "mytpwgts"); scopy(*nparts, tpwgts, mytpwgts); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = McPMETIS_CTYPE; ctrl.IType = McPMETIS_ITYPE; ctrl.RType = McPMETIS_RTYPE; ctrl.dbglvl = McPMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.optype = OP_PMETIS; ctrl.CoarsenTo = 100; ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); InitRandom(options[7]); AllocateWorkSpace(&ctrl, &graph, *nparts); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ASSERT(CheckGraph(&graph)); *edgecut = MCMlevelRecursiveBisection2(&ctrl, &graph, *nparts, mytpwgts, part, 1.000, 0); /* { idxtype wgt[2048], minwgt, maxwgt, sumwgt; printf("nvtxs: %d, nparts: %d, ncon: %d\n", graph.nvtxs, *nparts, *ncon); for (i=0; i<(*nparts)*(*ncon); i++) wgt[i] = 0; for (i=0; i<graph.nvtxs; i++) for (j=0; j<*ncon; j++) wgt[part[i]*(*ncon)+j] += vwgt[i*(*ncon)+j]; for (j=0; j<*ncon; j++) { minwgt = maxwgt = sumwgt = 0; for (i=0; i<(*nparts); i++) { minwgt = (wgt[i*(*ncon)+j] < wgt[minwgt*(*ncon)+j]) ? i : minwgt; maxwgt = (wgt[i*(*ncon)+j] > wgt[maxwgt*(*ncon)+j]) ? i : maxwgt; sumwgt += wgt[i*(*ncon)+j]; } avgwgt = (float)sumwgt / (float)*nparts; printf("min: %5d, max: %5d, avg: %5.2f, balance: %6.3f\n", wgt[minwgt*(*ncon)+j], wgt[maxwgt*(*ncon)+j], avgwgt, (float)wgt[maxwgt*(*ncon)+j] / avgwgt); } printf("\n"); } */ IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); FreeWorkSpace(&ctrl, &graph); GKfree((void**)(void *)&mytpwgts, LTERM); if (*numflag == 1) Change2FNumbering(*nvtxs, xadj, adjncy, part); }
/************************************************************************* * This function is the entry point of refinement **************************************************************************/ void RefineKWay(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, float *tpwgts, float ubfactor) { int i, nlevels, mustfree=0; GraphType *ptr; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); /* Compute the parameters of the coarsest graph */ ComputeKWayPartitionParams(ctrl, graph, nparts); /* Take care any non-contiguity */ IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { EliminateComponents(ctrl, graph, nparts, tpwgts, 1.25); EliminateSubDomainEdges(ctrl, graph, nparts, tpwgts); EliminateComponents(ctrl, graph, nparts, tpwgts, 1.25); } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); /* Determine how many levels are there */ for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); for (i=0; ;i++) { /* PrintSubDomainGraph(graph, nparts, graph->where); */ if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN && (i == nlevels/2 || i == nlevels/2+1)) EliminateSubDomainEdges(ctrl, graph, nparts, tpwgts); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); if (2*i >= nlevels && !IsBalanced(graph->pwgts, nparts, tpwgts, 1.04*ubfactor)) { ComputeKWayBalanceBoundary(ctrl, graph, nparts); if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) Greedy_KWayEdgeBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 1); else Greedy_KWayEdgeBalance(ctrl, graph, nparts, tpwgts, ubfactor, 1); ComputeKWayBoundary(ctrl, graph, nparts); } switch (ctrl->RType) { case RTYPE_KWAYRANDOM: Random_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); break; case RTYPE_KWAYGREEDY: Greedy_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10); break; case RTYPE_KWAYRANDOM_MCONN: Random_KWayEdgeRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 1); break; } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); if (graph == orggraph) break; GKfree(&graph->gdata, LTERM); /* Deallocate the graph related arrays */ graph = graph->finer; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); if (graph->vwgt == NULL) { graph->vwgt = idxsmalloc(graph->nvtxs, 1, "RefineKWay: graph->vwgt"); graph->adjwgt = idxsmalloc(graph->nedges, 1, "RefineKWay: graph->adjwgt"); mustfree = 1; } ProjectKWayPartition(ctrl, graph, nparts); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); } if (!IsBalanced(graph->pwgts, nparts, tpwgts, ubfactor)) { ComputeKWayBalanceBoundary(ctrl, graph, nparts); if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { Greedy_KWayEdgeBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 8); Random_KWayEdgeRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); } else { Greedy_KWayEdgeBalance(ctrl, graph, nparts, tpwgts, ubfactor, 8); Random_KWayEdgeRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); } } /* Take care any trivial non-contiguity */ IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr2)); EliminateComponents(ctrl, graph, nparts, tpwgts, ubfactor); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr2)); if (mustfree) GKfree(&graph->vwgt, &graph->adjwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int i, j; int ltvwgts[MAXNCON]; int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph, *mgraph; idxtype *morder; int minnvtxs; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nparts = npes; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); return; } if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) { if (mype == 0) printf("Error: Too many processors for this many vertices.\n"); return; } minnvtxs = vtxdist[1]-vtxdist[0]; for (i=0; i<npes; i++) minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i]; if (minnvtxs < (int)((float)npes*1.1)) { if (mype == 0) printf("Error: vertices are not distributed equally.\n"); return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts)); ctrl.seed = mype; ctrl.sync = seed; ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts"); ctrl.ubvec[0] = 1.03; graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial k-way partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Moc_Global_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*======================================================= * Move the graph according to the partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); MALLOC_CHECK(NULL); graph->ncon = 1; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); /*======================================================= * Now compute an ordering of the moved graph =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); ctrl.ipart = ISEP_NODE; ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000)); /* compute tvwgts */ for (j=0; j<mgraph->ncon; j++) ltvwgts[j] = 0; for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; for (j=0; j<mgraph->ncon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); MALLOC_CHECK(NULL); /* Invert the ordering back to the original graph */ ProjectInfoBack(&ctrl, graph, order, morder, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); free(ctrl.tpwgts); free(morder); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); MALLOC_CHECK(NULL); }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i, j; int nvtxs = -1, npes, mype; int uwgtflag, cut, gcut, maxnvtxs; int ltvwgts[MAXNCON]; int moptions[10]; CtrlType ctrl; idxtype *uvwgt; WorkSpaceType wspace; GraphType *graph, *mgraph; float avg, maximb, balance, *mytpwgts; int seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /******************************/ /* Take care of npes = 1 case */ /******************************/ if (npes == 1 && inparts > 1) { moptions[0] = 0; nvtxs = vtxdist[1]; if (incon == 1) { METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, itpwgts, moptions, edgecut, part); } else { /* ADD: this is because METIS does not support tpwgts for all constraints */ mytpwgts = fmalloc(inparts, "mytpwgts"); for (i=0; i<inparts; i++) mytpwgts[i] = itpwgts[i*incon]; moptions[7] = -1; METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, mytpwgts, moptions, edgecut, part); free(mytpwgts); } return; } if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; } SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = itpwgts; scopy(incon, iubvec, ctrl.ubvec); uwgtflag = iwgtflag|2; uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt"); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag); free(graph->nvwgt); graph->nvwgt = NULL; PreAllocateMemory(&ctrl, graph, &wspace); /*================================================================= * Compute the initial npes-way partitioning geometric partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*================================================================= * Move the graph according to the partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); free(uvwgt); graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt"); graph->ncon = incon; j = ctrl.nparts; ctrl.nparts = ctrl.npes; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); ctrl.nparts = j; /**********************************************************/ /* Do the same functionality as Moc_SetUpGraph for mgraph */ /**********************************************************/ /* compute tvwgts */ for (j=0; j<incon; j++) ltvwgts[j] = 0; for (i=0; i<graph->nvtxs; i++) for (j=0; j<incon; j++) ltvwgts[j] += mgraph->vwgt[i*incon+j]; for (j=0; j<incon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); /* check for zero wgt constraints */ for (i=0; i<incon; i++) { /* ADD: take care of the case in which tvwgts is zero */ if (ctrl.tvwgts[i] == 0) { if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i); MPI_Finalize(); exit(-1); } } /* compute nvwgt */ mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<incon; j++) mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); if (ctrl.dbglvl&DBG_INFO) { cut = 0; for (i=0; i<graph->nvtxs; i++) for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; gcut = GlobalSESum(&ctrl, cut)/2; maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs); balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes)); rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } /*================================================================= * Set up the newly moved graph =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ctrl.nparts = inparts; FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); /*======================================================= * Now compute the partition of the moved graph =======================================================*/ if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) { IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); PartitionSmallGraph(&ctrl, mgraph, &wspace); } else { Moc_Global_Partition(&ctrl, mgraph, &wspace); } ParallelReMapGraph(&ctrl, mgraph, &wspace); /* Invert the ordering back to the original graph */ ctrl.nparts = npes; ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace); *edgecut = mgraph->mincut; IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); /*******************/ /* Print out stats */ /*******************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); } GKfree((void **)&itpwgts, LTERM); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/************************************************************************* * This function creates the coarser graph **************************************************************************/ void CreateCoarseGraphNoMask(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm) { int i, j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize; idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj; idxtype *cmap, *htable; idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum; float *nvwgt, *cnvwgt; GraphType *cgraph; dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; vwgt = graph->vwgt; vsize = graph->vsize; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; cmap = graph->cmap; /* Initialize the coarser graph */ cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize); cxadj = cgraph->xadj; cvwgt = cgraph->vwgt; cvsize = cgraph->vsize; cnvwgt = cgraph->nvwgt; cadjwgtsum = cgraph->adjwgtsum; cadjncy = cgraph->adjncy; cadjwgt = cgraph->adjwgt; htable = idxset(cnvtxs, -1, idxwspacemalloc(ctrl, cnvtxs)); iend = xadj[nvtxs]; auxadj = ctrl->wspace.auxcore; memcpy(auxadj, adjncy, iend*sizeof(idxtype)); for (i=0; i<iend; i++) auxadj[i] = cmap[auxadj[i]]; cxadj[0] = cnvtxs = cnedges = 0; for (i=0; i<nvtxs; i++) { v = perm[i]; if (cmap[v] != cnvtxs) continue; u = match[v]; if (ncon == 1) cvwgt[cnvtxs] = vwgt[v]; else scopy(ncon, nvwgt+v*ncon, cnvwgt+cnvtxs*ncon); if (dovsize) cvsize[cnvtxs] = vsize[v]; cadjwgtsum[cnvtxs] = adjwgtsum[v]; nedges = 0; istart = xadj[v]; iend = xadj[v+1]; for (j=istart; j<iend; j++) { k = auxadj[j]; if ((m = htable[k]) == -1) { cadjncy[nedges] = k; cadjwgt[nedges] = adjwgt[j]; htable[k] = nedges++; } else { cadjwgt[m] += adjwgt[j]; } } if (v != u) { if (ncon == 1) cvwgt[cnvtxs] += vwgt[u]; else saxpy(ncon, 1.0, nvwgt+u*ncon, 1, cnvwgt+cnvtxs*ncon, 1); if (dovsize) cvsize[cnvtxs] += vsize[u]; cadjwgtsum[cnvtxs] += adjwgtsum[u]; istart = xadj[u]; iend = xadj[u+1]; for (j=istart; j<iend; j++) { k = auxadj[j]; if ((m = htable[k]) == -1) { cadjncy[nedges] = k; cadjwgt[nedges] = adjwgt[j]; htable[k] = nedges++; } else { cadjwgt[m] += adjwgt[j]; } } /* Remove the contracted adjacency weight */ if ((j = htable[cnvtxs]) != -1) { ASSERT(cadjncy[j] == cnvtxs); cadjwgtsum[cnvtxs] -= cadjwgt[j]; cadjncy[j] = cadjncy[--nedges]; cadjwgt[j] = cadjwgt[nedges]; htable[cnvtxs] = -1; } } ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d\n", cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt))); for (j=0; j<nedges; j++) htable[cadjncy[j]] = -1; /* Zero out the htable */ cnedges += nedges; cxadj[++cnvtxs] = cnedges; cadjncy += nedges; cadjwgt += nedges; } cgraph->nedges = cnedges; ReAdjustMemory(graph, cgraph, dovsize); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); idxwspacefree(ctrl, cnvtxs); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) { int h, i, ii, iii, j, k, c; int pass, nvtxs, nedges, ncon; int nmoves, nmoved, nswaps, nzgswaps; /* int gnswaps, gnzgswaps; */ int me, firstvtx, lastvtx, yourlastvtx; int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; int nlupd, nsupd, nnbrs, nchanged; idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; idxtype *where, *tmp_where, *moved; floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; idxtype *update, *supdate, *rupdate, *pe_updates; idxtype *changed, *perm, *pperm, *htable; idxtype *peind, *recvptr, *sendptr; KeyValueType *swchanges, *rwchanges; RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; int *nupds_pe; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); /*************************/ /* set up common aliases */ /*************************/ nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; ladjncy = graph->adjncy; adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->rinfo; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; ubvec = ctrl->ubvec; tpwgts = ctrl->tpwgts; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWR: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; /************************************/ /* set up important data structures */ /************************************/ perm = idxmalloc(nvtxs, "KWR: perm"); pperm = idxmalloc(nparts, "KWR: pperm"); update = idxmalloc(nvtxs, "KWR: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWR: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); for (i=0; i<nparts; i++) { for (h=0; h<ncon; h++) { badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h]; } } movewgts = fmalloc(nparts*ncon, "KWR: movewgts"); ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts"); pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts"); overfill = fmalloc(nparts*ncon, "KWR: overfill"); moved = idxmalloc(nvtxs, "KWR: moved"); tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where"); tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); idxcopy(nvtxs+graph->nrecv, where, tmp_where); for (i=0; i<nvtxs; i++) { tmp_rinfo[i].id = rinfo[i].id; tmp_rinfo[i].ed = rinfo[i].ed; tmp_rinfo[i].ndegrees = rinfo[i].ndegrees; tmp_rinfo[i].degrees = tmp_edegrees+xadj[i]; for (j=0; j<rinfo[i].ndegrees; j++) { tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge; tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt; } } nswaps = nzgswaps = 0; /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; pass<npasses; pass++) { if (mype == 0) RandomPermute(nparts, pperm, 1); MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); FastRandomPermute(nvtxs, perm, 1); oldcut = graph->mincut; /* check to see if the partitioning is imbalanced */ Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); ubavg = savg(ncon, ubvec); lbavg = savg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { scopy(ncon*nparts, gnpwgts, ognpwgts); sset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ /* PASS ONE -- record stats for desired moves */ /**********************************************/ for (iii=0; iii<nvtxs; iii++) { i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; for (h=0; h<ncon; h++) if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT) break; if (h < ncon) { continue; } /* check for a potential improvement */ if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) { my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) { to = my_edegrees[k].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) break; } } oldto = to; /* check if a subdomain was found that fits */ if (k < tmp_rinfo[i].ndegrees) { for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) { to = my_edegrees[j].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) { if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || (my_edegrees[j].ewgt == my_edegrees[k].ewgt && IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ k = j; oldto = my_edegrees[k].edge; } } } } to = oldto; if (my_edegrees[k].ewgt > tmp_rinfo[i].id || (my_edegrees[k].ewgt == tmp_rinfo[i].id && (imbalanced || graph->level > 3 || iii % 8 == 0) && IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ /****************************************/ /* Update tmp arrays of the moved vertex */ /****************************************/ tmp_where[i] = to; moved[nmoved++] = i; for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; gnpwgts[to*ncon+h] += nvwgt[h]; gnpwgts[from*ncon+h] -= nvwgt[h]; movewgts[to*ncon+h] += nvwgt[h]; movewgts[from*ncon+h] -= nvwgt[h]; } tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } } /******************************************/ /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ /**************************/ overweight = 0; for (j=0; j<nparts; j++) { for (h=0; h<ncon; h++) { if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) { overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); } else { overfill[j*ncon+h] = 0.0; } overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } /****************************************************/ /* select moves to undo according to overfill array */ /****************************************************/ if (overweight == 1) { for (iii=0; iii<nmoved; iii++) { i = moved[iii]; oldto = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) if (my_edegrees[k].edge == where[i]) break; for (h=0; h<ncon; h++) if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; /**********************************/ /* nullify this move if necessary */ /**********************************/ if (k != tmp_rinfo[i].ndegrees && h != ncon) { moved[iii] = -1; from = oldto; to = where[i]; for (h=0; h<ncon; h++) { overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0); } tmp_where[i] = to; tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } /*************************************************/ /* PASS TWO -- commit the remainder of the moves */ /*************************************************/ nlupd = nsupd = nmoves = nchanged = 0; for (iii=0; iii<nmoved; iii++) { i = moved[iii]; if (i == -1) continue; where[i] = tmp_where[i]; /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ htable[i] = 1; update[nlupd++] = i; } /* Put the vertices adjacent to i into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = ladjncy[j]; if (htable[k] == 0) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; nswaps++; /* check number of zero-gain moves */ for (k=0; k<rinfo[i].ndegrees; k++) if (rinfo[i].degrees[k].edge == to) break; if (rinfo[i].id == rinfo[i].degrees[k].ewgt) nzgswaps++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } /* Tell interested pe's the new where[] info for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices / whose degrees need to be update. /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { yourlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the recieved to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; mydomain = where[i]; myrinfo = rinfo+i; tmp_myrinfo = tmp_rinfo+i; my_edegrees = myrinfo->degrees; your_edegrees = tmp_myrinfo->degrees; graph->lmincut -= myrinfo->ed; myrinfo->ndegrees = 0; myrinfo->id = 0; myrinfo->ed = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { yourdomain = where[ladjncy[j]]; if (mydomain != yourdomain) { myrinfo->ed += adjwgt[j]; for (k=0; k<myrinfo->ndegrees; k++) { if (my_edegrees[k].edge == yourdomain) { my_edegrees[k].ewgt += adjwgt[j]; your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { my_edegrees[k].edge = yourdomain; my_edegrees[k].ewgt = adjwgt[j]; your_edegrees[k].edge = yourdomain; your_edegrees[k].ewgt = adjwgt[j]; myrinfo->ndegrees++; } ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; tmp_myrinfo->id = myrinfo->id; tmp_myrinfo->ed = myrinfo->ed; tmp_myrinfo->ndegrees = myrinfo->ndegrees; } /* finally, sum-up the partition weights */ MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; if (graph->mincut == oldcut) break; } /* gnswaps = GlobalSESum(ctrl, nswaps); gnzgswaps = GlobalSESum(ctrl, nzgswaps); if (mype == 0) printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps); */ GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }
int main(int argc, char *argv[]) { int i, npart; idxtype *part; float ncut=0; GraphType graph; char filename[256],outputFile[256]; int wgtflag = 0, addSelfLoop=1, outputFileGiven=0, txtFormat=0 ; int randomInit = 0; idxtype minEdgeWeight = 0; Options opt; timer TOTALTmr, METISTmr, IOTmr; initOptions(&opt); if (argc < 2) { print_help(argv[0]); exit(0); } for (argv++; *argv != NULL; argv++){ if ((*argv)[0] == '-') { int temp; switch ((*argv)[1]) { case 'b': case 'B': opt.penalty_power=atof(*(++argv)); break; case 'i': case 'I': opt.gamma=atof(*(++argv)); break; case 'o': case 'O': strcpy(outputFile,*(++argv)); outputFileGiven=1; break; case 'D'://quality threshold. This is a post-processing step proposed in SR-MCL. If you dont want post-processing (this is what original MLR-MCL, R-MCL, MCL do, please set "-d 0" case 'd': opt.quality_threshold = atof(*(++argv)); break; case 'w': case 'W': opt.weighted_density = true; break; case 'c': case 'C': opt.coarsenTo= atoi(*(++argv)); break; default: printf("Invalid option %s\n", *argv); print_help(argv[0]); exit(0); } } else { strcpy(filename, *argv); } } if ( randomInit > 0 ) InitRandom(time(NULL)); else InitRandom(-1); cleartimer(TOTALTmr); cleartimer(METISTmr); cleartimer(IOTmr); starttimer(TOTALTmr); starttimer(IOTmr); ReadGraph(&graph, filename, &wgtflag, addSelfLoop, txtFormat); if ( opt.matchType == MATCH_UNSPECIFIED ) { // opt.matchType = (graph.nvtxs>50000) ? MATCH_POWERLAW_FC : // MATCH_SHEMN; opt.matchType = MATCH_SHEMN; } stoptimer(IOTmr); if (graph.nvtxs <= 0) { printf("Empty graph. Nothing to do.\n"); exit(0); } int noOfSingletons = 0; GraphType *noSingletonGraph ; idxtype* nodeMap = lookForSingletons(&graph, &noOfSingletons); if ( noOfSingletons > 0 ) { getSubgraph(&graph, nodeMap, graph.nvtxs-noOfSingletons, wgtflag, &noSingletonGraph); GKfree((void**)&(graph.xadj), (void**)&(graph.adjncy), LTERM); if ( wgtflag&1 > 0 ) GKfree( (void**)&(graph.adjwgt), LTERM); // free(graph.gdata); printf("Found %d singleton nodes in the", noOfSingletons); printf(" input graph. Removing them.\n"); } if ( !outputFileGiven ) { strcpy(outputFile, filename); sprintf(outputFile,"%s.c%d.i%1.1f.b%1.1f",outputFile,opt.coarsenTo,opt.gamma,opt.penalty_power); } printf("Input graph information ---------------------------------------------------\n"); printf(" Name: %s, #Vertices: %d, #Edges: %d\n", filename, graph.nvtxs, graph.nedges/2); printf("Output shall be placed in the file %s\n", outputFile); fflush(stdout); part = idxmalloc(graph.nvtxs, "main: part"); printf("------------------------------------------------\n"); printf("Clustering....\n"); fflush(stdout); starttimer(METISTmr); //YK: main algorithm starts here! if ( noOfSingletons > 0 ) { mlmcl(&(noSingletonGraph->nvtxs), noSingletonGraph->xadj, noSingletonGraph->adjncy, noSingletonGraph->vwgt,noSingletonGraph->adjwgt, &wgtflag, part, opt ); } else { mlmcl(&graph.nvtxs, graph.xadj, graph.adjncy,graph.vwgt, graph.adjwgt, &wgtflag, part, opt ); } stoptimer(METISTmr); printf("------------------------------------------------\n"); if ( noOfSingletons > 0 ) { npart=mapPartition(part,noSingletonGraph->nvtxs); ncut=ComputeNCut(noSingletonGraph, part,npart); // printf("In graph that does not include singletons,"); // printf("No. of Clusters: %d, N-Cut value: %.2f\n",npart,ncut); idxtype *clusterSizes = histogram(part, graph.nvtxs-noOfSingletons, npart); int maxSize = clusterSizes[idxamax(npart, clusterSizes)]; float avgClusterSize = (graph.nvtxs-noOfSingletons)*1.0/(npart); float balance = (maxSize*1.0) / ((graph.nvtxs-noOfSingletons)*1.0/npart); float stdDevn = stdDeviation(clusterSizes, npart); float avgNcut = ncut * 1.0/npart; float normStdDevn = stdDevn/avgClusterSize; // Warning: This computation only works if the singletons // have been placed in their own clusters. This works for // MLR-MCL, in other words, because it is guaranteed to // place singletons in their own clusters. printf("Output statistics for graph without singletons\n"); printf("Clusters: %d N-Cut: %.3f", npart, ncut); printf(" AvgN-Cut: %.3f Balance in cluster sizes: %.2f ",avgNcut, balance); printf("Std_Deviation in cluster sizes: %.2f ", stdDevn); printf("Coefficient_of_Variation: %.2f\n", normStdDevn); free( clusterSizes ); npart += noOfSingletons; // ncut += noOfSingletons; printf("Output statistics for original graph\n"); mapIndices(part, nodeMap, graph.nvtxs, npart-noOfSingletons); } else { npart=mapPartition(part,graph.nvtxs); ncut=ComputeNCut(&graph, part,npart); } idxtype* clusterSizes = histogram(part, graph.nvtxs, npart); int maxSize = clusterSizes[idxamax(npart, clusterSizes)]; float avgClusterSize = (graph.nvtxs)*1.0/(npart); float balance = (maxSize*1.0)/(graph.nvtxs*1.0/npart); float stdDevn = stdDeviation(clusterSizes, npart); float avgNcut = ncut * 1.0/npart; float normStdDevn = stdDevn/avgClusterSize; printf("Clusters: %d N-Cut: %.3f AvgN-Cut: %.3f", npart, ncut, avgNcut ); printf(" Balance in cluster sizes: %.2f Std.Deviation in cluster sizes: %.2f ", balance, stdDevn); printf("Coefficient_of_Variation: %.2f\n", normStdDevn); starttimer(IOTmr); my_WritePartition(outputFile, part, graph.nvtxs, opt.gamma); if ( noOfSingletons > 0 ) { free(nodeMap); nodeMap = NULL; } printf("\nOutput is written to file: %s\n", outputFile); stoptimer(IOTmr); stoptimer(TOTALTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t\t %7.3f\n", gettimer(IOTmr)); printf(" Partitioning: \t\t %7.3f (MLR-MCL time)\n", gettimer(METISTmr)); printf(" Total: \t\t %7.3f\n", gettimer(TOTALTmr)); printf("**********************************************************************\n"); GKfree((void**)&graph.xadj, (void**)&graph.adjncy, (void**)&graph.vwgt, (void**)&graph.adjwgt, (void**)&part, LTERM); }
/************************************************************************* * Let the game begin **************************************************************************/ main(int argc, char *argv[]) { int i, options[10]; idxtype *perm, *iperm; GraphType graph; char filename[256]; int numflag = 0, wgtflag; timer TOTALTmr, METISTmr, IOTmr, SMBTmr; if (argc != 2) { printf("Usage: %s <GraphFile>\n",argv[0]); exit(0); } strcpy(filename, argv[1]); cleartimer(TOTALTmr); cleartimer(METISTmr); cleartimer(IOTmr); cleartimer(SMBTmr); starttimer(TOTALTmr); starttimer(IOTmr); ReadGraph(&graph, filename, &wgtflag); if (graph.nvtxs <= 0) { printf("Empty graph. Nothing to do.\n"); exit(0); } if (graph.ncon != 1) { printf("Ordering can only be applied to graphs with one constraint.\n"); exit(0); } stoptimer(IOTmr); /* Ordering does not use weights! */ GKfree(&graph.vwgt, &graph.adjwgt, LTERM); printf("**********************************************************************\n"); printf("%s", METISTITLE); printf("Graph Information ---------------------------------------------------\n"); printf(" Name: %s, #Vertices: %d, #Edges: %d\n\n", filename, graph.nvtxs, graph.nedges/2); printf("Node-Based Ordering... ----------------------------------------------\n"); perm = idxmalloc(graph.nvtxs, "main: perm"); iperm = idxmalloc(graph.nvtxs, "main: iperm"); options[0] = 0; starttimer(METISTmr); METIS_NodeND(&graph.nvtxs, graph.xadj, graph.adjncy, &numflag, options, perm, iperm); stoptimer(METISTmr); starttimer(IOTmr); WritePermutation(filename, iperm, graph.nvtxs); stoptimer(IOTmr); starttimer(SMBTmr); ComputeFillIn(&graph, iperm); stoptimer(SMBTmr); stoptimer(TOTALTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t %7.3f\n", gettimer(IOTmr)); printf(" Ordering: \t %7.3f (ONMETIS time)\n", gettimer(METISTmr)); printf(" Symbolic Factorization: \t %7.3f\n", gettimer(SMBTmr)); printf(" Total: \t %7.3f\n", gettimer(TOTALTmr)); printf("**********************************************************************\n"); GKfree(&graph.xadj, &graph.adjncy, &perm, &iperm, LTERM); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j; int ncon, mype, npes, gnvtxs, ngroups; idxtype *xadj, *adjncy, *adjwgt, *vwgt; idxtype *part, *gwhere0, *gwhere1; idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; GraphType *agraph; int lnparts, fpart, fpe, lnpes; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { float sum; int rank; } lpesum, gpesum; ncon = graph->ncon; ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); idxcopy(agraph->nedges, agraph->adjncy, adjncy); idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); gnvtxs = agraph->nvtxs; gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ /* ADD: consider changing this to breadth-first type bisection */ moptions[0] = 0; moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (mype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case npes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } /* In case npes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = ssum(ncon, lbvec); edgecut = ComputeSerialEdgeCut(agraph); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpesum.sum = (float) (edgecut); else lpesum.sum = (float) (max_cut); } MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); MPI_Comm_free(&ipcomm); GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/************************************************************************* * Let the game begin **************************************************************************/ main(int argc, char *argv[]) { int i, j, ne, nn, etype, numflag=0, nparts, edgecut; idxtype *elmnts, *epart, *npart; timer IOTmr, DUALTmr; char etypestr[4][5] = {"TRI", "TET", "HEX", "QUAD"}; GraphType graph; if (argc != 3) { printf("Usage: %s <meshfile> <nparts>\n",argv[0]); exit(0); } nparts = atoi(argv[2]); if (nparts < 2) { printf("nparts must be greater than one.\n"); exit(0); } cleartimer(IOTmr); cleartimer(DUALTmr); starttimer(IOTmr); elmnts = ReadMesh(argv[1], &ne, &nn, &etype); stoptimer(IOTmr); epart = idxmalloc(ne, "main: epart"); npart = idxmalloc(nn, "main: npart"); printf("**********************************************************************\n"); printf("%s", METISTITLE); printf("Mesh Information ----------------------------------------------------\n"); printf(" Name: %s, #Elements: %d, #Nodes: %d, Etype: %s\n\n", argv[1], ne, nn, etypestr[etype-1]); printf("Partitioning Nodal Graph... -----------------------------------------\n"); starttimer(DUALTmr); METIS_PartMeshNodal(&ne, &nn, elmnts, &etype, &numflag, &nparts, &edgecut, epart, npart); stoptimer(DUALTmr); printf(" %d-way Edge-Cut: %7d, Balance: %5.2f\n", nparts, edgecut, ComputeElementBalance(ne, nparts, epart)); starttimer(IOTmr); WriteMeshPartition(argv[1], nparts, ne, epart, nn, npart); stoptimer(IOTmr); printf("\nTiming Information --------------------------------------------------\n"); printf(" I/O: \t\t %7.3f\n", gettimer(IOTmr)); printf(" Partitioning: \t\t %7.3f\n", gettimer(DUALTmr)); printf("**********************************************************************\n"); /* graph.nvtxs = ne; graph.xadj = idxmalloc(ne+1, "xadj"); graph.vwgt = idxsmalloc(ne, 1, "vwgt"); graph.adjncy = idxmalloc(10*ne, "adjncy"); graph.adjwgt = idxsmalloc(10*ne, 1, "adjncy"); METIS_MeshToDual(&ne, &nn, elmnts, &etype, &numflag, graph.xadj, graph.adjncy); ComputePartitionInfo(&graph, nparts, epart); GKfree(&graph.xadj, &graph.adjncy, &graph.vwgt, &graph.adjwgt, LTERM); */ GKfree(&elmnts, &epart, &npart, LTERM); }
/************************************************************************* * This function computes the initial id/ed **************************************************************************/ void ComputeKWayVolGains(CtrlType *ctrl, GraphType *graph, int nparts) { int i, ii, j, k, kk, l, nvtxs, me, other, pid, myndegrees; idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *bndind, *bndptr, *ophtable; VRInfoType *rinfo, *myrinfo, *orinfo; VEDegreeType *myedegrees, *oedegrees; nvtxs = graph->nvtxs; xadj = graph->xadj; vsize = graph->vsize; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; bndind = graph->bndind; bndptr = idxset(nvtxs, -1, graph->bndptr); rinfo = graph->vrinfo; starttimer(ctrl->AuxTmr2); ophtable = idxset(nparts, -1, idxwspacemalloc(ctrl, nparts)); /*------------------------------------------------------------ / Compute now the iv/ev degrees /------------------------------------------------------------*/ graph->minvol = graph->nbnd = 0; for (i=0; i<nvtxs; i++) { myrinfo = rinfo+i; myrinfo->gv = -MAXIDX; if (myrinfo->ndegrees > 0) { me = where[i]; myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; graph->minvol += myndegrees*vsize[i]; for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; other = where[ii]; orinfo = rinfo+ii; oedegrees = orinfo->edegrees; for (k=0; k<orinfo->ndegrees; k++) ophtable[oedegrees[k].pid] = k; ophtable[other] = 1; /* this is to simplify coding */ if (me == other) { /* Find which domains 'i' is connected and 'ii' is not and update their gain */ for (k=0; k<myndegrees; k++) { if (ophtable[myedegrees[k].pid] == -1) myedegrees[k].gv -= vsize[ii]; } } else { ASSERT(ophtable[me] != -1); if (oedegrees[ophtable[me]].ned == 1) { /* I'm the only connection of 'ii' in 'me' */ /* Increase the gains for all the common domains between 'i' and 'ii' */ for (k=0; k<myndegrees; k++) { if (ophtable[myedegrees[k].pid] != -1) myedegrees[k].gv += vsize[ii]; } } else { /* Find which domains 'i' is connected and 'ii' is not and update their gain */ for (k=0; k<myndegrees; k++) { if (ophtable[myedegrees[k].pid] == -1) myedegrees[k].gv -= vsize[ii]; } } } for (kk=0; kk<orinfo->ndegrees; kk++) ophtable[oedegrees[kk].pid] = -1; ophtable[other] = -1; } /* Compute the max vgain */ for (k=0; k<myndegrees; k++) { if (myedegrees[k].gv > myrinfo->gv) myrinfo->gv = myedegrees[k].gv; } } if (myrinfo->ed > 0 && myrinfo->id == 0) myrinfo->gv += vsize[i]; if (myrinfo->gv >= 0 || myrinfo->ed-myrinfo->id >= 0) BNDInsert(graph->nbnd, bndind, bndptr, i); } stoptimer(ctrl->AuxTmr2); idxwspacefree(ctrl, nparts); }
/************************************************************************* * This function takes a graph and creates a sequence of coarser graphs **************************************************************************/ GraphType *Coarsen2Way(CtrlType *ctrl, GraphType *graph) { int clevel; GraphType *cgraph; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->CoarsenTmr)); cgraph = graph; /* The following is ahack to allow the multiple bisections to go through with correct coarsening */ if (ctrl->CType > 20) { clevel = 1; ctrl->CType -= 20; } else clevel = 0; do { IFSET(ctrl->dbglvl, DBG_COARSEN, printf("%6d %7d [%d] [%d %d]\n", cgraph->nvtxs, cgraph->nedges, ctrl->CoarsenTo, ctrl->maxvwgt, (cgraph->vwgt ? idxsum(cgraph->nvtxs, cgraph->vwgt) : cgraph->nvtxs))); if (cgraph->adjwgt) { switch (ctrl->CType) { case MATCH_RM: Match_RM(ctrl, cgraph); break; case MATCH_HEM: if (clevel < 1) Match_RM(ctrl, cgraph); else Match_HEM(ctrl, cgraph); break; case MATCH_SHEM: if (clevel < 1) Match_RM(ctrl, cgraph); else Match_SHEM(ctrl, cgraph); break; case MATCH_SHEMKWAY: Match_SHEM(ctrl, cgraph); break; default: errexit("Unknown CType: %d\n", ctrl->CType); } } else { Match_RM_NVW(ctrl, cgraph); } cgraph = cgraph->coarser; clevel++; } while (cgraph->nvtxs > ctrl->CoarsenTo && cgraph->nvtxs < COARSEN_FRACTION2*cgraph->finer->nvtxs && cgraph->nedges > cgraph->nvtxs/2); IFSET(ctrl->dbglvl, DBG_COARSEN, printf("%6d %7d [%d] [%d %d]\n", cgraph->nvtxs, cgraph->nedges, ctrl->CoarsenTo, ctrl->maxvwgt, (cgraph->vwgt ? idxsum(cgraph->nvtxs, cgraph->vwgt) : cgraph->nvtxs))); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->CoarsenTmr)); return cgraph; }
/************************************************************************* * This function is the entry point of refinement **************************************************************************/ void RefineVolKWay(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, float *tpwgts, float ubfactor) { int i, nlevels; GraphType *ptr; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); /* Take care any non-contiguity */ IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->AuxTmr1)); if (ctrl->RType == RTYPE_KWAYRANDOM_MCONN) { ComputeVolKWayPartitionParams(ctrl, graph, nparts); EliminateVolComponents(ctrl, graph, nparts, tpwgts, 1.25); EliminateVolSubDomainEdges(ctrl, graph, nparts, tpwgts); EliminateVolComponents(ctrl, graph, nparts, tpwgts, 1.25); } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->AuxTmr1)); /* Determine how many levels are there */ for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); /* Compute the parameters of the coarsest graph */ ComputeVolKWayPartitionParams(ctrl, graph, nparts); for (i=0; ;i++) { /*PrintSubDomainGraph(graph, nparts, graph->where);*/ MALLOC_CHECK(NULL); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->RefTmr)); if (2*i >= nlevels && !IsBalanced(graph->pwgts, nparts, tpwgts, 1.04*ubfactor)) { ComputeVolKWayBalanceBoundary(ctrl, graph, nparts); switch (ctrl->RType) { case RTYPE_KWAYRANDOM: Greedy_KWayVolBalance(ctrl, graph, nparts, tpwgts, ubfactor, 1); break; case RTYPE_KWAYRANDOM_MCONN: Greedy_KWayVolBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 1); break; } ComputeVolKWayBoundary(ctrl, graph, nparts); } switch (ctrl->RType) { case RTYPE_KWAYRANDOM: Random_KWayVolRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); break; case RTYPE_KWAYRANDOM_MCONN: Random_KWayVolRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); break; } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->RefTmr)); if (graph == orggraph) break; GKfree(&graph->gdata, LTERM); /* Deallocate the graph related arrays */ graph = graph->finer; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); ProjectVolKWayPartition(ctrl, graph, nparts); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); } if (!IsBalanced(graph->pwgts, nparts, tpwgts, ubfactor)) { ComputeVolKWayBalanceBoundary(ctrl, graph, nparts); switch (ctrl->RType) { case RTYPE_KWAYRANDOM: Greedy_KWayVolBalance(ctrl, graph, nparts, tpwgts, ubfactor, 8); Random_KWayVolRefine(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); break; case RTYPE_KWAYRANDOM_MCONN: Greedy_KWayVolBalanceMConn(ctrl, graph, nparts, tpwgts, ubfactor, 8); Random_KWayVolRefineMConn(ctrl, graph, nparts, tpwgts, ubfactor, 10, 0); break; } } EliminateVolComponents(ctrl, graph, nparts, tpwgts, ubfactor); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_SHEM(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, nvtxs, cnvtxs, maxidx, maxwgt, avgdegree; idxtype *xadj, *vwgt, *adjncy, *adjwgt; idxtype *match, *cmap, *degrees, *perm, *tperm; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr)); nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; cmap = graph->cmap; match = idxset(nvtxs, UNMATCHED, idxwspacemalloc(ctrl, nvtxs)); perm = idxwspacemalloc(ctrl, nvtxs); tperm = idxwspacemalloc(ctrl, nvtxs); degrees = idxwspacemalloc(ctrl, nvtxs); RandomPermute(nvtxs, tperm, 1); avgdegree = 0.7*(xadj[nvtxs]/nvtxs); for (i=0; i<nvtxs; i++) degrees[i] = (xadj[i+1]-xadj[i] > avgdegree ? avgdegree : xadj[i+1]-xadj[i]); BucketSortKeysInc(nvtxs, avgdegree, degrees, tperm, perm); cnvtxs = 0; /* Take care any islands. Islands are matched with non-islands due to coarsening */ for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { /* Unmatched */ if (xadj[i] < xadj[i+1]) break; maxidx = i; for (j=nvtxs-1; j>ii; j--) { k = perm[j]; if (match[k] == UNMATCHED && xadj[k] < xadj[k+1]) { maxidx = k; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } /* Continue with normal matching */ for (; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { /* Unmatched */ maxidx = i; maxwgt = 0; /* Find a heavy-edge matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { if (match[adjncy[j]] == UNMATCHED && maxwgt < adjwgt[j] && vwgt[i]+vwgt[adjncy[j]] <= ctrl->maxvwgt) { maxwgt = adjwgt[j]; maxidx = adjncy[j]; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr)); idxwspacefree(ctrl, nvtxs); /* degrees */ idxwspacefree(ctrl, nvtxs); /* tperm */ CreateCoarseGraph(ctrl, graph, cnvtxs, match, perm); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
void MLKKMRefine(CtrlType *ctrl, GraphType *orggraph, GraphType *graph, int nparts, int chain_length, float *tpwgts, float ubfactor) { int i, nlevels, mustfree=0, temp_cl; GraphType *ptr; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->UncoarsenTmr)); /* Compute the parameters of the coarsest graph */ ComputeKWayPartitionParams(ctrl, graph, nparts); temp_cl = chain_length; /* Determine how many levels are there */ for (ptr=graph, nlevels=0; ptr!=orggraph; ptr=ptr->finer, nlevels++); //printf("Number of levels is %d\n", nlevels); for (i=0; ;i++) { timer tmr; float result; cleartimer(tmr); starttimer(tmr); //pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor); //chain_length /= 1.5; //printf("Level: %d\n", i+1); if (graph == orggraph){ //chain_length = chain_length>0 ? chain_length : 1; pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor, 1); break; } else{ //pingpong(ctrl, graph, nparts, 0, tpwgts, ubfactor, 0); pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor, 0); //chain_length /= 2; } //pingpong(ctrl, graph, nparts, chain_length, tpwgts, ubfactor); // /* for time and quality each level stoptimer(tmr); //printf("Level %d: %7.3f", i+1, tmr); if (cutType == NCUT){ result = ComputeNCut(graph, graph->where, nparts); //printf(" %7f", result); } else{ result = ComputeRAsso(graph, graph->where, nparts); //printf(" %7f", result); } //printf(" (%d)\n\n", graph->nvtxs); //ends here*/ if (graph == orggraph) break; /* if(i>1) chain_length /= 10; */ GKfree((void **) &graph->gdata, LTERM); /* Deallocate the graph related arrays */ graph = graph->finer; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ProjectTmr)); if (graph->vwgt == NULL) { graph->vwgt = idxsmalloc(graph->nvtxs, 1, "RefineKWay: graph->vwgt"); graph->adjwgt = idxsmalloc(graph->nedges, 1, "RefineKWay: graph->adjwgt"); mustfree = 1; } ProjectKWayPartition(ctrl, graph, nparts); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ProjectTmr)); } if (mustfree) GKfree((void **) &graph->vwgt, (void **) &graph->adjwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->UncoarsenTmr)); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void InitPartition(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, ncon, mype, npes, gnvtxs, ngroups; idx_t *xadj, *adjncy, *adjwgt, *vwgt; idx_t *part, *gwhere0, *gwhere1; idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; graph_t *agraph; idx_t lnparts, fpart, fpe, lnpes; idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut; real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { double sum; int rank; } lpesum, gpesum; WCOREPUSH; ncon = graph->ncon; ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); lbvec = rwspacemalloc(ctrl, ncon); /* assemble the graph to all the processors */ agraph = AssembleAdaptiveGraph(ctrl, graph); gnvtxs = agraph->nvtxs; /* make a copy of the graph's structure for later */ xadj = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1)); vwgt = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon)); adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges)); adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges)); part = iwspacemalloc(ctrl, gnvtxs); /* create different processor groups */ gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); gkMPI_Comm_rank(ipcomm, &mype); gkMPI_Comm_size(ipcomm, &npes); /* Go into the recursive bisection */ METIS_SetDefaultOptions(moptions); moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1; tpwgts = ctrl->tpwgts; tpwgts2 = rwspacemalloc(ctrl, 2*ncon); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* determine the weights of the two partitions as a function of the weight of the target partition weights */ for (j=(lnparts>>1), i=0; i<ncon; i++) { tpwgts2[i] = rsum(j, tpwgts+fpart*ncon+i, ncon); tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon); wsum = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]); tpwgts2[i] *= wsum; tpwgts2[ncon+i] *= wsum; } METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, &edgecut, part); /* pick one of the branches */ if (mype < fpe+lnpes/2) { KeepPart(ctrl, agraph, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { KeepPart(ctrl, agraph, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs)); gwhere1 = iwspacemalloc(ctrl, gnvtxs); if (lnparts == 1) { /* Case npes is greater than or equal to nparts */ /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } else { /* Case in which npes is smaller than nparts */ /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */ tpwgts = rwspacemalloc(ctrl, lnparts*ncon); for (j=0; j<ncon; j++) { for (wsum=0.0, i=0; i<lnparts; i++) { tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j]; wsum += tpwgts[i*ncon+j]; } for (wsum=1.0/wsum, i=0; i<lnparts; i++) tpwgts[i*ncon+j] *= wsum; } METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; edgecut = ComputeSerialEdgeCut(agraph); ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = rsum(ncon, lbvec, 1); gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ctrl->gcomm); gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION*ncon) { if (lbsum < UNBALANCE_FRACTION*ncon) lpesum.sum = edgecut; else lpesum.sum = max_cut; } lpesum.rank = ctrl->mype; gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT, MPI_MINLOC, ctrl->gcomm); gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); gkMPI_Comm_free(&ipcomm); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); WCOREPOP; }
void KWayNodeRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses, float ubfraction) { int i, ii, iii, j, k, pass, nvtxs, firstvtx, lastvtx, otherlastvtx, c, nmoves, nlupd, nsupd, nnbrs, nchanged, nsep; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt; idxtype *where, *lpwgts, *gpwgts, *sepind; idxtype *peind, *recvptr, *sendptr; idxtype *update, *supdate, *rupdate, *pe_updates, *htable, *changed; idxtype *badminpwgt, *badmaxpwgt; KeyValueType *swchanges, *rwchanges; int *nupds_pe; NRInfoType *rinfo, *myrinfo; int from, to, me, other, oldcut; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; vwgt = graph->vwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->nrinfo; lpwgts = graph->lpwgts; gpwgts = graph->gpwgts; nsep = graph->nsep; sepind = graph->sepind; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWayRefine: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; update = idxmalloc(nvtxs, "KWayRefine: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWayRefine: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWayRefine: lhtable"); badminpwgt = wspace->pv1; badmaxpwgt = wspace->pv2; for (i=0; i<nparts; i+=2) { badminpwgt[i] = badminpwgt[i+1] = (1.0/ubfraction)*(gpwgts[i]+gpwgts[i+1])/2; badmaxpwgt[i] = badmaxpwgt[i+1] = ubfraction*(gpwgts[i]+gpwgts[i+1])/2; } //myprintf(ctrl, "%6d %6d %6d %6d %6d %6d %6d\n", lpwgts[0], lpwgts[1], lpwgts[2], gpwgts[0], gpwgts[1], gpwgts[2], badmaxpwgt[0]); IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badminpwgt, badmaxpwgt, 1)); for (pass=0; pass<npasses; pass++) { oldcut = graph->mincut; for (c=0; c<2; c++) { for (i=0; i<nparts; i+=2) { badminpwgt[i] = badminpwgt[i+1] = (1.0/ubfraction)*(gpwgts[i]+gpwgts[i+1])/2; badmaxpwgt[i] = badmaxpwgt[i+1] = ubfraction*(gpwgts[i]+gpwgts[i+1])/2; } nlupd = nsupd = nmoves = nchanged = 0; for (ii=0; ii<nsep; ii++) { i = sepind[ii]; from = where[i]; ASSERT(ctrl, from >= nparts); /* Go through the loop to see if gain is possible for the separator vertex */ if (rinfo[i].edegrees[(c+1)%2] <= vwgt[i]) { /* It is a one-sded move so it will go to the other partition. Look at the comments in InitMultisection to understand the meaning of from%nparts */ to = from%nparts+c; if (gpwgts[to]+vwgt[i] > badmaxpwgt[to]) { /* printf("Skip because of weight! %d\n", vwgt[i]-rinfo[i].edegrees[(c+1)%2]); */ continue; /* We cannot move it there because it gets too heavy */ } /* Update the where information of the vertex you moved */ where[i] = to; /* Remove this vertex from the sepind. Note the trick for looking at the sepind[ii] again */ sepind[ii--] = sepind[--nsep]; /* myprintf(ctrl, "Vertex %d [%d %d] is moving to %d from %d [%d]\n", i+firstvtx, vwgt[i], rinfo[i].edegrees[(c+1)%2], to, from, where[i]); */ lpwgts[from] -= vwgt[i]; lpwgts[2*nparts-1] -= vwgt[i]; lpwgts[to] += vwgt[i]; gpwgts[to] += vwgt[i]; /* Put the vertices adjacent to i that belong to either the separator or the (c+1)%2 partition into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (htable[k] == 0 && where[k] != to) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } } /* myprintf(ctrl, "nmoves: %d, nlupd: %d, nsupd: %d\n", nmoves, nlupd, nsupd); */ /* Tell everybody interested what the new where[] info is for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%d %d %d]\n", pass, c, GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*----------------------------------------------------------------------- / Time to communicate with processors to send the vertices whose degrees / need to be updated. /-----------------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { otherlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < otherlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the received to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the where information of the vertices that are pulled / into the separator. /-------------------------------------------------------------*/ nchanged = 0; for (ii=0; ii<nlupd; ii++) { i = update[ii]; me = where[i]; if (me < nparts && me%2 == (c+1)%2) { /* This vertex is pulled into the separator */ lpwgts[me] -= vwgt[i]; where[i] = nparts+me-(me%2); sepind[nsep++] = i; /* Put the vertex into the sepind array */ if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; lpwgts[where[i]] += vwgt[i]; lpwgts[2*nparts-1] += vwgt[i]; /* myprintf(ctrl, "Vertex %d moves into the separator from %d to %d\n", i+firstvtx, me, where[i]); */ } } /* Tell everybody interested what the new where[] info is for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; me = where[i]; if (me >= nparts) { /* If it is a separator vertex */ /* myprintf(ctrl, "Updating %d %d\n", i+firstvtx, me); */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { other = where[adjncy[j]]; if (me != other) myrinfo->edegrees[other%2] += vwgt[adjncy[j]]; } } } /* Finally, sum-up the partition weights */ MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badminpwgt, badmaxpwgt, 0)); } if (graph->mincut == oldcut) break; } GKfree((void **)&update, &nupds_pe, &htable, &changed, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }
/************************************************************************* * This function creates the coarser graph **************************************************************************/ void CreateCoarseGraph_NVW(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm) { int i, j, jj, k, kk, l, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, mask; idxtype *xadj, *adjncy, *adjwgtsum, *auxadj; idxtype *cmap, *htable; idxtype *cxadj, *cvwgt, *cadjncy, *cadjwgt, *cadjwgtsum; float *nvwgt, *cnvwgt; GraphType *cgraph; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr)); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgtsum = graph->adjwgtsum; cmap = graph->cmap; /* Initialize the coarser graph */ cgraph = SetUpCoarseGraph(graph, cnvtxs, 0); cxadj = cgraph->xadj; cvwgt = cgraph->vwgt; cnvwgt = cgraph->nvwgt; cadjwgtsum = cgraph->adjwgtsum; cadjncy = cgraph->adjncy; cadjwgt = cgraph->adjwgt; iend = xadj[nvtxs]; auxadj = ctrl->wspace.auxcore; memcpy(auxadj, adjncy, iend*sizeof(idxtype)); for (i=0; i<iend; i++) auxadj[i] = cmap[auxadj[i]]; mask = HTLENGTH; htable = idxset(mask+1, -1, idxwspacemalloc(ctrl, mask+1)); cxadj[0] = cnvtxs = cnedges = 0; for (i=0; i<nvtxs; i++) { v = perm[i]; if (cmap[v] != cnvtxs) continue; u = match[v]; cvwgt[cnvtxs] = 1; cadjwgtsum[cnvtxs] = adjwgtsum[v]; nedges = 0; istart = xadj[v]; iend = xadj[v+1]; for (j=istart; j<iend; j++) { k = auxadj[j]; kk = k&mask; if ((m = htable[kk]) == -1) { cadjncy[nedges] = k; cadjwgt[nedges] = 1; htable[kk] = nedges++; } else if (cadjncy[m] == k) { cadjwgt[m]++; } else { for (jj=0; jj<nedges; jj++) { if (cadjncy[jj] == k) { cadjwgt[jj]++; break; } } if (jj == nedges) { cadjncy[nedges] = k; cadjwgt[nedges++] = 1; } } } if (v != u) { cvwgt[cnvtxs]++; cadjwgtsum[cnvtxs] += adjwgtsum[u]; istart = xadj[u]; iend = xadj[u+1]; for (j=istart; j<iend; j++) { k = auxadj[j]; kk = k&mask; if ((m = htable[kk]) == -1) { cadjncy[nedges] = k; cadjwgt[nedges] = 1; htable[kk] = nedges++; } else if (cadjncy[m] == k) { cadjwgt[m]++; } else { for (jj=0; jj<nedges; jj++) { if (cadjncy[jj] == k) { cadjwgt[jj]++; break; } } if (jj == nedges) { cadjncy[nedges] = k; cadjwgt[nedges++] = 1; } } } /* Remove the contracted adjacency weight */ jj = htable[cnvtxs&mask]; if (jj >= 0 && cadjncy[jj] != cnvtxs) { for (jj=0; jj<nedges; jj++) { if (cadjncy[jj] == cnvtxs) break; } } if (jj >= 0 && cadjncy[jj] == cnvtxs) { /* This 2nd check is needed for non-adjacent matchings */ cadjwgtsum[cnvtxs] -= cadjwgt[jj]; cadjncy[jj] = cadjncy[--nedges]; cadjwgt[jj] = cadjwgt[nedges]; } } ASSERTP(cadjwgtsum[cnvtxs] == idxsum(nedges, cadjwgt), ("%d %d %d %d %d\n", cnvtxs, cadjwgtsum[cnvtxs], idxsum(nedges, cadjwgt), adjwgtsum[u], adjwgtsum[v])); for (j=0; j<nedges; j++) htable[cadjncy[j]&mask] = -1; /* Zero out the htable */ htable[cnvtxs&mask] = -1; cnedges += nedges; cxadj[++cnvtxs] = cnedges; cadjncy += nedges; cadjwgt += nedges; } cgraph->nedges = cnedges; ReAdjustMemory(graph, cgraph, 0); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr)); idxwspacefree(ctrl, mask+1); }
void ComputeNodePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j, nparts, nvtxs, nsep, firstvtx, lastvtx; idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt, *lpwgts, *gpwgts, *sepind, *hmarker; idxtype *where; NRInfoType *rinfo, *myrinfo; int me, other, otherwgt; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr)); nvtxs = graph->nvtxs; nparts = ctrl->nparts; vtxdist = graph->vtxdist; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; vwgt = graph->vwgt; where = graph->where; rinfo = graph->nrinfo; lpwgts = graph->lpwgts; gpwgts = graph->gpwgts; sepind = graph->sepind; hmarker = graph->hmarker; firstvtx = vtxdist[ctrl->mype]; lastvtx = vtxdist[ctrl->mype+1]; /* Reset refinement data structures */ idxset(2*nparts, 0, lpwgts); idxset(nvtxs, 0, hmarker); /* Send/Receive the where and vwgt information of interface vertices. */ CommInterfaceData(ctrl, graph, where, wspace->indices, where+nvtxs); CommInterfaceData(ctrl, graph, vwgt, wspace->indices, vwgt+nvtxs); /*------------------------------------------------------------ / Compute now the degrees /------------------------------------------------------------*/ for (nsep=i=0; i<nvtxs; i++) { me = where[i]; ASSERT(ctrl, me >= 0 && me < 2*nparts); lpwgts[me] += vwgt[i]; if (me >= nparts) { /* If it is a separator vertex */ sepind[nsep++] = i; lpwgts[2*nparts-1] += vwgt[i]; /* Keep track of total separator weight */ myrinfo = rinfo+i; myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { other = where[adjncy[j]]; if (me != other) myrinfo->edegrees[other%2] += vwgt[adjncy[j]]; } } } graph->nsep = nsep; /* Finally, sum-up the partition weights */ MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); graph->mincut = gpwgts[2*nparts-1]; /* Mark the halo vertices by determining how many non-local vertices they are * connected to. */ idxset(nvtxs, 0, hmarker); for (i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (adjncy[j] >= nvtxs) { hmarker[i]++; } } } #ifdef XX /* Print Weight information */ if (ctrl->mype == 0) { for (i=0; i<nparts; i+=2) printf("[%5d %5d %5d] ", gpwgts[i], gpwgts[i+1], gpwgts[nparts+i]); printf("\n"); } #endif IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr)); }