/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ int ParMETIS_V3_RefineKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { idx_t npes, mype, status; ctrl_t *ctrl=NULL; graph_t *graph=NULL; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup ctrl */ ctrl = SetupCtrl(PARMETIS_OP_RMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); npes = ctrl->npes; mype = ctrl->mype; /* Take care the nparts == 1 case */ if (*nparts == 1) { iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; goto DONE; } /* setup the graph */ if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); if (ctrl->ps_relation == PARMETIS_PSR_COUPLED) iset(graph->nvtxs, mype, graph->home); else icopy(graph->nvtxs, part, graph->home); /* Allocate workspace */ AllocateWSpace(ctrl, 10*graph->nvtxs); /* Partition and Remap */ STARTTIMER(ctrl, ctrl->TotalTmr); ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 50*(*ncon)*gk_max(npes, *nparts)); Adaptive_Partition(ctrl, graph); ParallelReMapGraph(ctrl, graph); icopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; STOPTIMER(ctrl, ctrl->TotalTmr); /* Take care of output */ IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1)); FreeInitialGraphAndRemap(graph); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
void Adaptive_Partition(ctrl_t *ctrl, graph_t *graph) { idx_t i; idx_t tewgt, tvsize; real_t gtewgt, gtvsize; real_t ubavg, lbavg, *lbvec; WCOREPUSH; lbvec = rwspacemalloc(ctrl, graph->ncon); /************************************/ /* Set up important data structures */ /************************************/ CommSetup(ctrl, graph); ubavg = ravg(graph->ncon, ctrl->ubvec); tewgt = isum(graph->nedges, graph->adjwgt, 1); tvsize = isum(graph->nvtxs, graph->vsize, 1); gtewgt = (real_t) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs; /* The +1/graph->gnvtxs were added to remove any FPE */ gtvsize = (real_t) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio); IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"]\n", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo)); if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { AllocateRefinementWorkSpace(ctrl, 2*graph->nedges); /***********************************************/ /* Balance the partition on the coarsest graph */ /***********************************************/ graph->where = ismalloc(graph->nvtxs+graph->nrecv, -1, "graph->where"); icopy(graph->nvtxs, graph->home, graph->where); ComputeParallelBalance(ctrl, graph, graph->where, lbvec); lbavg = ravg(graph->ncon, lbvec); if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION) Balance_Partition(ctrl, graph); if (ctrl->dbglvl&DBG_PROGRESS) { ComputePartitionParams(ctrl, graph); ComputeParallelBalance(ctrl, graph, graph->where, lbvec); rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", graph->gnvtxs, graph->mincut); for (i=0; i<graph->ncon; i++) rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); /* free memory allocated by ComputePartitionParams */ gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM); } /* check if no coarsening took place */ if (graph->finer == NULL) { ComputePartitionParams(ctrl, graph); KWayBalance(ctrl, graph, graph->ncon); KWayAdaptiveRefine(ctrl, graph, NGR_PASSES); } } else { /*******************************/ /* Coarsen it and partition it */ /*******************************/ switch (ctrl->ps_relation) { case PARMETIS_PSR_COUPLED: Match_Local(ctrl, graph); break; case PARMETIS_PSR_UNCOUPLED: default: Match_Global(ctrl, graph); break; } Adaptive_Partition(ctrl, graph->coarser); /********************************/ /* project partition and refine */ /********************************/ ProjectPartition(ctrl, graph); ComputePartitionParams(ctrl, graph); if (graph->ncon > 1 && graph->level < 4) { ComputeParallelBalance(ctrl, graph, graph->where, lbvec); lbavg = ravg(graph->ncon, lbvec); if (lbavg > ubavg + 0.025) { KWayBalance(ctrl, graph, graph->ncon); } } KWayAdaptiveRefine(ctrl, graph, NGR_PASSES); if (ctrl->dbglvl&DBG_PROGRESS) { ComputeParallelBalance(ctrl, graph, graph->where, lbvec); rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", graph->gnvtxs, graph->mincut); for (i=0; i<graph->ncon; i++) rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]); rprintf(ctrl, "\n"); } } WCOREPOP; }
/************************************************************************* * This function is the driver for the adaptive refinement mode of ParMETIS **************************************************************************/ void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i; int tewgt, tvsize; floattype gtewgt, gtvsize; floattype ubavg, lbavg, lbvec[MAXNCON]; /************************************/ /* Set up important data structures */ /************************************/ SetUp(ctrl, graph, wspace); ubavg = savg(graph->ncon, ctrl->ubvec); tewgt = idxsum(graph->nedges, graph->adjwgt); tvsize = idxsum(graph->nvtxs, graph->vsize); gtewgt = (floattype) GlobalSESum(ctrl, tewgt) + 1.0; /* The +1 were added to remove any FPE */ gtvsize = (floattype) GlobalSESum(ctrl, tvsize) + 1.0; ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio); IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d]\n", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo)); if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) { /***********************************************/ /* Balance the partition on the coarsest graph */ /***********************************************/ graph->where = idxsmalloc(graph->nvtxs+graph->nrecv, -1, "graph->where"); idxcopy(graph->nvtxs, graph->home, graph->where); Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); lbavg = savg(graph->ncon, lbvec); if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION) Balance_Partition(ctrl, graph, wspace); if (ctrl->dbglvl&DBG_PROGRESS) { Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs); for (i=0; i<graph->ncon; i++) rprintf(ctrl, "%.3f ", lbvec[i]); rprintf(ctrl, "\n"); } /* check if no coarsening took place */ if (graph->finer == NULL) { Moc_ComputePartitionParams(ctrl, graph, wspace); Moc_KWayBalance(ctrl, graph, wspace, graph->ncon); Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); } } else { /*******************************/ /* Coarsen it and partition it */ /*******************************/ switch (ctrl->ps_relation) { case COUPLED: Mc_LocalMatch_HEM(ctrl, graph, wspace); break; case DISCOUPLED: default: Moc_GlobalMatch_Balance(ctrl, graph, wspace); break; } Adaptive_Partition(ctrl, graph->coarser, wspace); /********************************/ /* project partition and refine */ /********************************/ Moc_ProjectPartition(ctrl, graph, wspace); Moc_ComputePartitionParams(ctrl, graph, wspace); if (graph->ncon > 1 && graph->level < 4) { Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); lbavg = savg(graph->ncon, lbvec); if (lbavg > ubavg + 0.025) { Moc_KWayBalance(ctrl, graph, wspace, graph->ncon); } } Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES); if (ctrl->dbglvl&DBG_PROGRESS) { Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut); for (i=0; i<graph->ncon; i++) rprintf(ctrl, "%.3f ", lbvec[i]); rprintf(ctrl, "\n"); } } }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i; int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int tewgt, tvsize, nmoved, maxin, maxout; float gtewgt, gtvsize, avg, maximb; int ps_relation, seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /* ADD: take care of disconnected graph */ /* ADD: take care of highly unbalanced vtxdist */ /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /**************************/ /* Set up data structures */ /**************************/ if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED; } SetUpCtrl(&ctrl, inparts, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts)); ctrl.ipc_factor = 1000.0; ctrl.redist_factor = 1.0; ctrl.redist_base = 1.0; ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = REFINE_PARTITION; ctrl.ps_relation = ps_relation; ctrl.tpwgts = itpwgts; graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize"); graph->home = idxmalloc(graph->nvtxs, "home"); if (ctrl.ps_relation == COUPLED) idxset(graph->nvtxs, mype, graph->home); else idxcopy(graph->nvtxs, part, graph->home); tewgt = idxsum(graph->nedges, graph->adjwgt); tvsize = idxsum(graph->nvtxs, graph->vsize); gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl.edge_size_ratio = gtewgt/gtvsize; scopy(incon, iubvec, ctrl.ubvec); PreAllocateMemory(&ctrl, graph, &wspace); /***********************/ /* Partition and Remap */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Adaptive_Partition(&ctrl, graph, &wspace); ParallelReMapGraph(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); idxcopy(graph->nvtxs, graph->where, part); if (edgecut != NULL) *edgecut = graph->mincut; /***********************/ /* Take care of output */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); } /*************************************/ /* Free memory, renumber, and return */ /*************************************/ GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM); GKfree((void **)&itpwgts, LTERM); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); return; }