/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void InitPartition(ctrl_t *ctrl, graph_t *graph) { idx_t i, j, ncon, mype, npes, gnvtxs, ngroups; idx_t *xadj, *adjncy, *adjwgt, *vwgt; idx_t *part, *gwhere0, *gwhere1; idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; graph_t *agraph; idx_t lnparts, fpart, fpe, lnpes; idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut; real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { double sum; int rank; } lpesum, gpesum; WCOREPUSH; ncon = graph->ncon; ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); lbvec = rwspacemalloc(ctrl, ncon); /* assemble the graph to all the processors */ agraph = AssembleAdaptiveGraph(ctrl, graph); gnvtxs = agraph->nvtxs; /* make a copy of the graph's structure for later */ xadj = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1)); vwgt = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon)); adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges)); adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges)); part = iwspacemalloc(ctrl, gnvtxs); /* create different processor groups */ gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); gkMPI_Comm_rank(ipcomm, &mype); gkMPI_Comm_size(ipcomm, &npes); /* Go into the recursive bisection */ METIS_SetDefaultOptions(moptions); moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1; tpwgts = ctrl->tpwgts; tpwgts2 = rwspacemalloc(ctrl, 2*ncon); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* determine the weights of the two partitions as a function of the weight of the target partition weights */ for (j=(lnparts>>1), i=0; i<ncon; i++) { tpwgts2[i] = rsum(j, tpwgts+fpart*ncon+i, ncon); tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon); wsum = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]); tpwgts2[i] *= wsum; tpwgts2[ncon+i] *= wsum; } METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, &edgecut, part); /* pick one of the branches */ if (mype < fpe+lnpes/2) { KeepPart(ctrl, agraph, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { KeepPart(ctrl, agraph, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs)); gwhere1 = iwspacemalloc(ctrl, gnvtxs); if (lnparts == 1) { /* Case npes is greater than or equal to nparts */ /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } else { /* Case in which npes is smaller than nparts */ /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */ tpwgts = rwspacemalloc(ctrl, lnparts*ncon); for (j=0; j<ncon; j++) { for (wsum=0.0, i=0; i<lnparts; i++) { tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j]; wsum += tpwgts[i*ncon+j]; } for (wsum=1.0/wsum, i=0; i<lnparts; i++) tpwgts[i*ncon+j] *= wsum; } METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; edgecut = ComputeSerialEdgeCut(agraph); ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = rsum(ncon, lbvec, 1); gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ctrl->gcomm); gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION*ncon) { if (lbsum < UNBALANCE_FRACTION*ncon) lpesum.sum = edgecut; else lpesum.sum = max_cut; } lpesum.rank = ctrl->mype; gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT, MPI_MINLOC, ctrl->gcomm); gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); gkMPI_Comm_free(&ipcomm); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); WCOREPOP; }
/*********************************************************************************** * This function is the entry point of the serial ordering algorithm. ************************************************************************************/ int ParMETIS_SerialNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm) { idx_t i, npes, mype; ctrl_t *ctrl=NULL; graph_t *agraph=NULL; idx_t *perm=NULL, *iperm=NULL; idx_t *sendcount, *displs; /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_OMETIS, options, 1, 1, NULL, NULL, *comm); npes = ctrl->npes; mype = ctrl->mype; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); FreeCtrl(&ctrl); return METIS_ERROR; } if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); STARTTIMER(ctrl, ctrl->TotalTmr); STARTTIMER(ctrl, ctrl->MoveTmr); agraph = AssembleEntireGraph(ctrl, vtxdist, xadj, adjncy); STOPTIMER(ctrl, ctrl->MoveTmr); if (mype == 0) { perm = imalloc(agraph->nvtxs, "PAROMETISS: perm"); iperm = imalloc(agraph->nvtxs, "PAROMETISS: iperm"); METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, npes, NULL, perm, iperm, sizes); } STARTTIMER(ctrl, ctrl->MoveTmr); /* Broadcast the sizes array */ gkMPI_Bcast((void *)sizes, 2*npes, IDX_T, 0, ctrl->gcomm); /* Scatter the iperm */ sendcount = imalloc(npes, "PAROMETISS: sendcount"); displs = imalloc(npes, "PAROMETISS: displs"); for (i=0; i<npes; i++) { sendcount[i] = vtxdist[i+1]-vtxdist[i]; displs[i] = vtxdist[i]; } gkMPI_Scatterv((void *)iperm, sendcount, displs, IDX_T, (void *)order, vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, ctrl->gcomm); STOPTIMER(ctrl, ctrl->MoveTmr); STOPTIMER(ctrl, ctrl->TotalTmr); IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); gk_free((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, &sendcount, &displs, &agraph, LTERM); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); goto DONE; DONE: FreeCtrl(&ctrl); return METIS_OK; }