/************************************************************************* * This function is the entry point for KMETIS with seed specification * in options[7] **************************************************************************/ void METIS_PartGraphKway2(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, int *edgecut, idxtype *part) { int i; float *tpwgts; tpwgts = fmalloc(*nparts, "KMETIS: tpwgts"); for (i=0; i<*nparts; i++) tpwgts[i] = 1.0/(1.0*(*nparts)); METIS_WPartGraphKway2(nvtxs, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, nparts, tpwgts, options, edgecut, part); GKfree((void **)&tpwgts, LTERM); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j; int ncon, mype, npes, gnvtxs, ngroups; idxtype *xadj, *adjncy, *adjwgt, *vwgt; idxtype *part, *gwhere0, *gwhere1; idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; GraphType *agraph; int lnparts, fpart, fpe, lnpes; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { float sum; int rank; } lpesum, gpesum; ncon = graph->ncon; ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); idxcopy(agraph->nedges, agraph->adjncy, adjncy); idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); gnvtxs = agraph->nvtxs; gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ /* ADD: consider changing this to breadth-first type bisection */ moptions[0] = 0; moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (mype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case npes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } /* In case npes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = ssum(ncon, lbvec); edgecut = ComputeSerialEdgeCut(agraph); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpesum.sum = (float) (edgecut); else lpesum.sum = (float) (max_cut); } MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); MPI_Comm_free(&ipcomm); GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/************************************************************************* * This function is the entry point of the initial balancing algorithm. * This algorithm assembles the graph to all the processors and preceeds * with the balancing step. **************************************************************************/ void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j, mype, npes, nvtxs, nedges, ncon; idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize; idxtype *part, *lwhere, *home; GraphType *agraph, cgraph; CtrlType myctrl; int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls; float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum; float rating, max_rating, your_cost = -1.0, your_balance = -1.0; float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2]; MPI_Status status; MPI_Comm ipcomm, srcomm; struct { float cost; int rank; } lpecost, gpecost; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); vtxdist = graph->vtxdist; agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); nvtxs = cgraph.nvtxs = agraph->nvtxs; nedges = cgraph.nedges = agraph->nedges; ncon = cgraph.ncon = agraph->ncon; xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj"); vwgt = cgraph.vwgt = xadj + nvtxs+1; vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1; cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1; lwhere = xadj + nvtxs*(3+ncon)+1; home = xadj + nvtxs*(4+ncon)+1; adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1; adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges; /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; idxcopy(nvtxs+1, agraph->xadj, xadj); idxcopy(nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(nvtxs, agraph->vsize, vsize); idxcopy(nedges, agraph->adjncy, adjncy); idxcopy(nedges, agraph->adjwgt, adjwgt); /****************************************/ /****************************************/ if (ctrl->ps_relation == DISCOUPLED) { rcounts = imalloc(ctrl->npes, "rcounts"); rdispls = imalloc(ctrl->npes+1, "rdispls"); for (i=0; i<ctrl->npes; i++) { rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i]; } MAKECSR(i, ctrl->npes, rdispls); MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE, (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm); for (i=0; i<agraph->nvtxs; i++) home[i] = part[i]; GKfree((void **)&rcounts, (void **)&rdispls, LTERM); } else { for (i=0; i<ctrl->npes; i++) for (j=vtxdist[i]; j<vtxdist[i+1]; j++) part[j] = home[j] = i; } /* Ensure that the initial partitioning is legal */ for (i=0; i<agraph->nvtxs; i++) { if (part[i] >= ctrl->nparts) part[i] = home[i] = part[i] % ctrl->nparts; if (part[i] < 0) part[i] = home[i] = (-1*part[i]) % ctrl->nparts; } /****************************************/ /****************************************/ IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec)); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph))); for (i=0; i<agraph->ncon; i++) IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i])); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n")); /****************************************/ /* Split the processors into two groups */ /****************************************/ sr = (ctrl->mype % 2 == 0) ? 1 : 0; gd = (ctrl->mype % 2 == 1) ? 1 : 0; if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) { sr = 1; gd = 0; } sr_pe = 0; gd_pe = 1; MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); myctrl.dbglvl = 0; myctrl.mype = mype; myctrl.npes = npes; myctrl.comm = ipcomm; myctrl.sync = ctrl->sync; myctrl.seed = ctrl->seed; myctrl.nparts = ctrl->nparts; myctrl.ipc_factor = ctrl->ipc_factor; myctrl.redist_factor = ctrl->redist_base; myctrl.partType = ADAPTIVE_PARTITION; myctrl.ps_relation = DISCOUPLED; myctrl.tpwgts = ctrl->tpwgts; icopy(ncon, ctrl->tvwgts, myctrl.tvwgts); icopy(ncon, ctrl->ubvec, myctrl.ubvec); if (sr == 1) { /*******************************************/ /* Half of the processors do scratch-remap */ /*******************************************/ ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1); MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm); MPI_Comm_rank(srcomm, &srmype); MPI_Comm_size(srcomm, &srnpes); moptions[0] = 0; moptions[7] = ctrl->sync + (mype % ngroups) + 1; idxset(nvtxs, 0, lwhere); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = srnpes; while (lnpes > 1 && lnparts > 1) { ASSERT(ctrl, agraph->nvtxs > 1); /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (agraph->ncon == 1) { METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (srmype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case srnpes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (srmype == fpe) { for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart; } } /* In case srnpes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm); edgecut = ComputeSerialEdgeCut(&cgraph); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = (float)edgecut; else lpecost.cost = (float)max_cut + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) { MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm); } if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) { MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); } if (ctrl->mype == sr_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } MPI_Comm_free(&srcomm); } /**************************************/ /* The other half do global diffusion */ /**************************************/ else { /******************************************************************/ /* The next stmt is required to balance out the sr MPI_Comm_split */ /******************************************************************/ MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm); if (ncon == 1) { rating = WavefrontDiffusion(&myctrl, agraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); /* Determine which PE computed the best partitioning */ MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = rating; else lpecost.cost = max_rating + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); /* Now send this to the coordinating processor */ if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe) MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm); if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe) MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); if (ctrl->mype == gd_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } } else { Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES); } } if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) { if (ctrl->mype == sr_pe || ctrl->mype == gd_pe) { /********************************************************************/ /* The coordinators from each group decide on the best partitioning */ /********************************************************************/ my_cut = (float) ComputeSerialEdgeCut(&cgraph); my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); my_balance = ssum(cgraph.ncon, lbvec); my_balance /= (float) cgraph.ncon; my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv; IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n", (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance)); if (ctrl->mype == gd_pe) { buffer[0] = my_cost; buffer[1] = my_balance; MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm); } else { MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status); your_cost = buffer[0]; your_balance = buffer[1]; } } if (ctrl->mype == sr_pe) { who_wins = gd_pe; if ((my_balance < 1.1 && your_balance > 1.1) || (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) || (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) { who_wins = sr_pe; } } MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm); } else { who_wins = sr_pe; } MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm); idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where); MPI_Comm_free(&ipcomm); GKfree((void **)&xadj, (void **)&mytpwgts, LTERM); /* For whatever reason, FreeGraph crashes here...so explicitly free the memory. FreeGraph(agraph); */ GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM); GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM); GKfree((void **)&agraph, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }