/****************************************************************************** * This function takes a graph and its partition vector and creates a new * graph corresponding to the one after the movement *******************************************************************************/ void TestMoveGraph(graph_t *ograph, graph_t *omgraph, idx_t *part, MPI_Comm comm) { idx_t npes, mype; ctrl_t *ctrl; graph_t *graph, *mgraph; idx_t options[5] = {0, 0, 1, 0, 0}; gkMPI_Comm_size(comm, &npes); ctrl = SetupCtrl(PARMETIS_OP_KMETIS, NULL, 1, npes, NULL, NULL, comm); mype = ctrl->mype; ctrl->CoarsenTo = 1; /* Needed by SetUpGraph, otherwise we can FP errors */ graph = TestSetUpGraph(ctrl, ograph->vtxdist, ograph->xadj, NULL, ograph->adjncy, NULL, 0); AllocateWSpace(ctrl, 0); CommSetup(ctrl, graph); graph->where = part; graph->ncon = 1; mgraph = MoveGraph(ctrl, graph); omgraph->gnvtxs = mgraph->gnvtxs; omgraph->nvtxs = mgraph->nvtxs; omgraph->nedges = mgraph->nedges; omgraph->vtxdist = mgraph->vtxdist; omgraph->xadj = mgraph->xadj; omgraph->adjncy = mgraph->adjncy; mgraph->vtxdist = NULL; mgraph->xadj = NULL; mgraph->adjncy = NULL; FreeGraph(mgraph); graph->where = NULL; FreeInitialGraphAndRemap(graph); FreeCtrl(&ctrl); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm) { int i, npes, mype, nvtxs, firstvtx, dbglvl; idxtype *xadj, *adjncy; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int zeroflg = 0; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); return; } /* Setup a fake graph to allow the rest of the code to work unchanged */ dbglvl = 0; nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; /* Proceed with the rest of the code */ SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.seed = mype; ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial geometric partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); GKfree((void **)&xadj, (void **)&adjncy, LTERM); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { /* Take care the npes = 1 case */ idxset(vtxdist[1], 0, part); *edgecut = 0; return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); SetUpCtrl(&ctrl, npes, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize"); PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype)); AdaptiveUndirected_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype)); ReMapGraph(&ctrl, graph, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; IMfree((void**)&graph->vsize, LTERM); FreeInitialGraphAndRemap(graph, *wgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, idx_t *part) { int sigrval=0, renumber=0; graph_t *graph; ctrl_t *ctrl; /* set up malloc cleaning code and signal catchers */ if (!gk_malloc_init()) return METIS_ERROR_MEMORY; gk_sigtrap(); if ((sigrval = gk_sigcatch()) != 0) goto SIGTHROW; /* set up the run parameters */ ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec); if (!ctrl) { gk_siguntrap(); return METIS_ERROR_INPUT; } /* if required, change the numbering to 0 */ if (ctrl->numflag == 1) { Change2CNumbering(*nvtxs, xadj, adjncy); renumber = 1; } /* set up the graph */ graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt); /* set up multipliers for making balance computations easier */ SetupKWayBalMultipliers(ctrl, graph); /* set various run parameters that depend on the graph */ if (ctrl->iptype == METIS_IPTYPE_METISRB) { ctrl->CoarsenTo = gk_max((*nvtxs)/(40*gk_log2(*nparts)), 30*(*nparts)); ctrl->CoarsenTo = 10*(*nparts); ctrl->nIparts = (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5); } else { ctrl->CoarsenTo = 10*(*nparts); ctrl->nIparts = 10; } /* take care contiguity requests for disconnected graphs */ if (ctrl->contig && !IsConnected(graph, 0)) gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n"); /* allocate workspace memory */ AllocateWorkSpace(ctrl, graph); /* start the partitioning */ IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->TotalTmr)); *objval = MlevelKWayPartitioning(ctrl, graph, part); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->TotalTmr)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); /* clean up */ FreeCtrl(&ctrl); SIGTHROW: /* if required, change the numbering back to 1 */ if (renumber) Change2FNumbering(*nvtxs, xadj, adjncy, part); gk_siguntrap(); gk_malloc_cleanup(0); return metis_rcode(sigrval); }
int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *options, idx_t *perm, idx_t *iperm) { int sigrval=0, renumber=0; idx_t i, ii, j, l, nnvtxs=0; graph_t *graph=NULL; ctrl_t *ctrl; idx_t *cptr, *cind, *piperm; int numflag = 0; /* set up malloc cleaning code and signal catchers */ if (!gk_malloc_init()) return METIS_ERROR_MEMORY; gk_sigtrap(); if ((sigrval = gk_sigcatch()) != 0) goto SIGTHROW; /* set up the run time parameters */ ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); if (!ctrl) { gk_siguntrap(); return METIS_ERROR_INPUT; } /* if required, change the numbering to 0 */ if (ctrl->numflag == 1) { Change2CNumbering(*nvtxs, xadj, adjncy); renumber = 1; } IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr)); /* prune the dense columns */ if (ctrl->pfactor > 0.0) { piperm = imalloc(*nvtxs, "OMETIS: piperm"); graph = PruneGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor); if (graph == NULL) { /* if there was no prunning, cleanup the pfactor */ gk_free((void **)&piperm, LTERM); ctrl->pfactor = 0.0; } else { nnvtxs = graph->nvtxs; ctrl->compress = 0; /* disable compression if prunning took place */ } } /* compress the graph; note that compression only happens if not prunning has taken place. */ if (ctrl->compress) { cptr = imalloc(*nvtxs+1, "OMETIS: cptr"); cind = imalloc(*nvtxs, "OMETIS: cind"); graph = CompressGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, cptr, cind); if (graph == NULL) { /* if there was no compression, cleanup the compress flag */ gk_free((void **)&cptr, &cind, LTERM); ctrl->compress = 0; } else { nnvtxs = graph->nvtxs; ctrl->cfactor = 1.0*(*nvtxs)/nnvtxs; if (ctrl->cfactor > 1.5 && ctrl->nseps == 1) ctrl->nseps = 2; //ctrl->nseps = (idx_t)(ctrl->cfactor*ctrl->nseps); } } /* if no prunning and no compression, setup the graph in the normal way. */ if (ctrl->pfactor == 0.0 && ctrl->compress == 0) graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); ASSERT(CheckGraph(graph, ctrl->numflag, 1)); /* allocate workspace memory */ AllocateWorkSpace(ctrl, graph); /* do the nested dissection ordering */ if (ctrl->ccorder) MlevelNestedDissectionCC(ctrl, graph, iperm, graph->nvtxs); else MlevelNestedDissection(ctrl, graph, iperm, graph->nvtxs); if (ctrl->pfactor > 0.0) { /* Order any prunned vertices */ icopy(nnvtxs, iperm, perm); /* Use perm as an auxiliary array */ for (i=0; i<nnvtxs; i++) iperm[piperm[i]] = perm[i]; for (i=nnvtxs; i<*nvtxs; i++) iperm[piperm[i]] = i; gk_free((void **)&piperm, LTERM); } else if (ctrl->compress) { /* Uncompress the ordering */ /* construct perm from iperm */ for (i=0; i<nnvtxs; i++) perm[iperm[i]] = i; for (l=ii=0; ii<nnvtxs; ii++) { i = perm[ii]; for (j=cptr[i]; j<cptr[i+1]; j++) iperm[cind[j]] = l++; } gk_free((void **)&cptr, &cind, LTERM); } for (i=0; i<*nvtxs; i++) perm[iperm[i]] = i; IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr)); IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl)); /* clean up */ FreeCtrl(&ctrl); SIGTHROW: /* if required, change the numbering back to 1 */ if (renumber) Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm); gk_siguntrap(); gk_malloc_cleanup(0); return metis_rcode(sigrval); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int i, j; int ltvwgts[MAXNCON]; int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph, *mgraph; idxtype *morder; int minnvtxs; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nparts = npes; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); return; } if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) { if (mype == 0) printf("Error: Too many processors for this many vertices.\n"); return; } minnvtxs = vtxdist[1]-vtxdist[0]; for (i=0; i<npes; i++) minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i]; if (minnvtxs < (int)((float)npes*1.1)) { if (mype == 0) printf("Error: vertices are not distributed equally.\n"); return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts)); ctrl.seed = mype; ctrl.sync = seed; ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts"); ctrl.ubvec[0] = 1.03; graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial k-way partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Moc_Global_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*======================================================= * Move the graph according to the partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); MALLOC_CHECK(NULL); graph->ncon = 1; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); /*======================================================= * Now compute an ordering of the moved graph =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); ctrl.ipart = ISEP_NODE; ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000)); /* compute tvwgts */ for (j=0; j<mgraph->ncon; j++) ltvwgts[j] = 0; for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; for (j=0; j<mgraph->ncon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); MALLOC_CHECK(NULL); /* Invert the ordering back to the original graph */ ProjectInfoBack(&ctrl, graph, order, morder, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); free(ctrl.tpwgts); free(morder); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); MALLOC_CHECK(NULL); }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i, j; int nvtxs = -1, npes, mype; int uwgtflag, cut, gcut, maxnvtxs; int ltvwgts[MAXNCON]; int moptions[10]; CtrlType ctrl; idxtype *uvwgt; WorkSpaceType wspace; GraphType *graph, *mgraph; float avg, maximb, balance, *mytpwgts; int seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /******************************/ /* Take care of npes = 1 case */ /******************************/ if (npes == 1 && inparts > 1) { moptions[0] = 0; nvtxs = vtxdist[1]; if (incon == 1) { METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, itpwgts, moptions, edgecut, part); } else { /* ADD: this is because METIS does not support tpwgts for all constraints */ mytpwgts = fmalloc(inparts, "mytpwgts"); for (i=0; i<inparts; i++) mytpwgts[i] = itpwgts[i*incon]; moptions[7] = -1; METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, mytpwgts, moptions, edgecut, part); free(mytpwgts); } return; } if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; } SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = itpwgts; scopy(incon, iubvec, ctrl.ubvec); uwgtflag = iwgtflag|2; uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt"); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag); free(graph->nvwgt); graph->nvwgt = NULL; PreAllocateMemory(&ctrl, graph, &wspace); /*================================================================= * Compute the initial npes-way partitioning geometric partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*================================================================= * Move the graph according to the partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); free(uvwgt); graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt"); graph->ncon = incon; j = ctrl.nparts; ctrl.nparts = ctrl.npes; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); ctrl.nparts = j; /**********************************************************/ /* Do the same functionality as Moc_SetUpGraph for mgraph */ /**********************************************************/ /* compute tvwgts */ for (j=0; j<incon; j++) ltvwgts[j] = 0; for (i=0; i<graph->nvtxs; i++) for (j=0; j<incon; j++) ltvwgts[j] += mgraph->vwgt[i*incon+j]; for (j=0; j<incon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); /* check for zero wgt constraints */ for (i=0; i<incon; i++) { /* ADD: take care of the case in which tvwgts is zero */ if (ctrl.tvwgts[i] == 0) { if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i); MPI_Finalize(); exit(-1); } } /* compute nvwgt */ mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<incon; j++) mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); if (ctrl.dbglvl&DBG_INFO) { cut = 0; for (i=0; i<graph->nvtxs; i++) for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; gcut = GlobalSESum(&ctrl, cut)/2; maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs); balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes)); rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } /*================================================================= * Set up the newly moved graph =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ctrl.nparts = inparts; FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); /*======================================================= * Now compute the partition of the moved graph =======================================================*/ if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) { IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); PartitionSmallGraph(&ctrl, mgraph, &wspace); } else { Moc_Global_Partition(&ctrl, mgraph, &wspace); } ParallelReMapGraph(&ctrl, mgraph, &wspace); /* Invert the ordering back to the original graph */ ctrl.nparts = npes; ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace); *edgecut = mgraph->mincut; IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); /*******************/ /* Print out stats */ /*******************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); } GKfree((void **)&itpwgts, LTERM); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ int ParMETIS_V3_RefineKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { idx_t npes, mype, status; ctrl_t *ctrl=NULL; graph_t *graph=NULL; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup ctrl */ ctrl = SetupCtrl(PARMETIS_OP_RMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); npes = ctrl->npes; mype = ctrl->mype; /* Take care the nparts == 1 case */ if (*nparts == 1) { iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; goto DONE; } /* setup the graph */ if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); if (ctrl->ps_relation == PARMETIS_PSR_COUPLED) iset(graph->nvtxs, mype, graph->home); else icopy(graph->nvtxs, part, graph->home); /* Allocate workspace */ AllocateWSpace(ctrl, 10*graph->nvtxs); /* Partition and Remap */ STARTTIMER(ctrl, ctrl->TotalTmr); ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 50*(*ncon)*gk_max(npes, *nparts)); Adaptive_Partition(ctrl, graph); ParallelReMapGraph(ctrl, graph); icopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; STOPTIMER(ctrl, ctrl->TotalTmr); /* Take care of output */ IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1)); FreeInitialGraphAndRemap(graph); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, real_t *tpwgts, real_t *ubvec) { idx_t i, j; ctrl_t *ctrl; ctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "SetupCtrl: ctrl"); memset((void *)ctrl, 0, sizeof(ctrl_t)); switch (optype) { case METIS_OP_PMETIS: ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); ctrl->ctype = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM); ctrl->rtype = METIS_RTYPE_FM; ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); ctrl->seed = GETOPTION(options, METIS_OPTION_SEED, -1); ctrl->dbglvl = GETOPTION(options, METIS_OPTION_DBGLVL, 0); if (ncon == 1) { ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_GROW); ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, PMETIS_DEFAULT_UFACTOR); ctrl->CoarsenTo = 20; } else { ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_RANDOM); ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, MCPMETIS_DEFAULT_UFACTOR); ctrl->CoarsenTo = 100; } break; case METIS_OP_KMETIS: ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT); ctrl->ctype = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM); ctrl->iptype = METIS_IPTYPE_METISRB; ctrl->rtype = METIS_RTYPE_GREEDY; ctrl->ncuts = GETOPTION(options, METIS_OPTION_NCUTS, 1); ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, KMETIS_DEFAULT_UFACTOR); ctrl->minconn = GETOPTION(options, METIS_OPTION_MINCONN, 0); ctrl->contig = GETOPTION(options, METIS_OPTION_CONTIG, 0); ctrl->seed = GETOPTION(options, METIS_OPTION_SEED, -1); ctrl->dbglvl = GETOPTION(options, METIS_OPTION_DBGLVL, 0); break; case METIS_OP_OMETIS: ctrl->objtype = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_NODE); ctrl->ctype = GETOPTION(options, METIS_OPTION_CTYPE, METIS_CTYPE_SHEM); ctrl->rtype = GETOPTION(options, METIS_OPTION_RTYPE, METIS_RTYPE_SEP1SIDED); ctrl->iptype = GETOPTION(options, METIS_OPTION_IPTYPE, METIS_IPTYPE_EDGE); ctrl->nseps = GETOPTION(options, METIS_OPTION_NSEPS, 1); ctrl->niter = GETOPTION(options, METIS_OPTION_NITER, 10); ctrl->ufactor = GETOPTION(options, METIS_OPTION_UFACTOR, OMETIS_DEFAULT_UFACTOR); ctrl->compress = GETOPTION(options, METIS_OPTION_COMPRESS, 1); ctrl->ccorder = GETOPTION(options, METIS_OPTION_CCORDER, 0); ctrl->seed = GETOPTION(options, METIS_OPTION_SEED, -1); ctrl->dbglvl = GETOPTION(options, METIS_OPTION_DBGLVL, 0); ctrl->pfactor = 0.1*GETOPTION(options, METIS_OPTION_PFACTOR, 0); ctrl->CoarsenTo = 100; break; default: gk_errexit(SIGERR, "Unknown optype of %d\n", optype); } ctrl->numflag = GETOPTION(options, METIS_OPTION_NUMBERING, 0); ctrl->optype = optype; ctrl->ncon = ncon; ctrl->nparts = nparts; ctrl->maxvwgt = ismalloc(ncon, 0, "SetupCtrl: maxvwgt"); /* setup the target partition weights */ if (ctrl->optype != METIS_OP_OMETIS) { ctrl->tpwgts = rmalloc(nparts*ncon, "SetupCtrl: ctrl->tpwgts"); if (tpwgts) { rcopy(nparts*ncon, tpwgts, ctrl->tpwgts); } else { for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) ctrl->tpwgts[i*ncon+j] = 1.0/nparts; } } } else { /* METIS_OP_OMETIS */ /* this is required to allow the pijbm to be defined properly for the edge-based refinement during initial partitioning */ ctrl->tpwgts = rsmalloc(2, .5, "SetupCtrl: ctrl->tpwgts"); } /* setup the ubfactors */ ctrl->ubfactors = rsmalloc(ctrl->ncon, I2RUBFACTOR(ctrl->ufactor), "SetupCtrl: ubfactors"); if (ubvec) rcopy(ctrl->ncon, ubvec, ctrl->ubfactors); for (i=0; i<ctrl->ncon; i++) ctrl->ubfactors[i] += 0.0000499; /* Allocate memory for balance multipliers. Note that for PMETIS/OMETIS routines the memory allocated is more than required as balance multipliers for 2 parts is sufficient. */ ctrl->pijbm = rmalloc(nparts*ncon, "SetupCtrl: ctrl->pijbm"); InitRandom(ctrl->seed); IFSET(ctrl->dbglvl, METIS_DBG_INFO, PrintCtrl(ctrl)); if (!CheckParams(ctrl)) { FreeCtrl(&ctrl); return NULL; } else { return ctrl; } }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ int ParMETIS_V3_PartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { idx_t h, i, j, npes, mype, status, nvtxs, seed, dbglvl; idx_t cut, gcut, maxnvtxs; idx_t moptions[METIS_NOPTIONS]; ctrl_t *ctrl; graph_t *graph, *mgraph; real_t balance; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_GKMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); npes = ctrl->npes; mype = ctrl->mype; /* Take care the nparts == 1 case */ if (*nparts == 1) { iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; goto DONE; } /* Take care of npes == 1 case */ if (npes == 1) { nvtxs = vtxdist[1] - vtxdist[0]; /* subtraction is required when numflag==1 */ METIS_SetDefaultOptions(moptions); moptions[METIS_OPTION_NUMBERING] = *numflag; status = METIS_PartGraphKway(&nvtxs, ncon, xadj, adjncy, vwgt, NULL, adjwgt, nparts, tpwgts, ubvec, moptions, edgecut, part); goto DONE; } /* Setup the graph */ if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); gk_free((void **)&graph->nvwgt, LTERM); /* Allocate the workspace */ AllocateWSpace(ctrl, 10*graph->nvtxs); /* Compute the initial npes-way partitioning geometric partitioning */ STARTTIMER(ctrl, ctrl->TotalTmr); Coordinate_Partition(ctrl, graph, *ndims, xyz, 1); STOPTIMER(ctrl, ctrl->TotalTmr); /* Move the graph according to the partitioning */ STARTTIMER(ctrl, ctrl->MoveTmr); ctrl->nparts = npes; mgraph = MoveGraph(ctrl, graph); ctrl->nparts = *nparts; SetupGraph_nvwgts(ctrl, mgraph); /* compute nvwgts for the moved graph */ if (ctrl->dbglvl&DBG_INFO) { CommInterfaceData(ctrl, graph, graph->where, graph->where+graph->nvtxs); for (cut=0, i=0; i<graph->nvtxs; i++) { for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) { if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; } } gcut = GlobalSESum(ctrl, cut)/2; maxnvtxs = GlobalSEMax(ctrl, mgraph->nvtxs); balance = (real_t)(maxnvtxs)/((real_t)(graph->gnvtxs)/(real_t)(npes)); rprintf(ctrl, "XYZ Cut: %6"PRIDX" \tBalance: %6.3"PRREAL" [%"PRIDX" %"PRIDX" %"PRIDX"]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } STOPTIMER(ctrl, ctrl->MoveTmr); /* Compute the partition of the moved graph */ STARTTIMER(ctrl, ctrl->TotalTmr); ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts)); if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(ctrl, mgraph->nedges) == 0) { /* serially */ IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes])); PartitionSmallGraph(ctrl, mgraph); } else { /* in parallel */ Global_Partition(ctrl, mgraph); } ParallelReMapGraph(ctrl, mgraph); /* Invert the ordering back to the original graph */ ctrl->nparts = npes; ProjectInfoBack(ctrl, graph, part, mgraph->where); ctrl->nparts = *nparts; *edgecut = mgraph->mincut; STOPTIMER(ctrl, ctrl->TotalTmr); /* Print some stats */ IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, mgraph, 0)); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ int ParMETIS_V3_PartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, MPI_Comm *comm) { idx_t i, nvtxs, firstvtx, npes, mype, status; idx_t *xadj, *adjncy; ctrl_t *ctrl=NULL; graph_t *graph=NULL; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartGeom(vtxdist, ndims, xyz, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_GMETIS, NULL, 1, 1, NULL, NULL, *comm); /*ctrl->dbglvl=15;*/ npes = ctrl->npes; mype = ctrl->mype; /* Trivial case when npes == 1 */ if (npes == 1) { iset(vtxdist[mype+1]-vtxdist[mype], 0, part); goto DONE; } /* Setup a fake graph to allow the rest of the code to work unchanged */ nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = imalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = imalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; graph = SetupGraph(ctrl, 1, vtxdist, xadj, NULL, NULL, adjncy, NULL, 0); /* Allocate workspace memory */ AllocateWSpace(ctrl, 5*graph->nvtxs); /* Compute the initial geometric partitioning */ STARTTIMER(ctrl, ctrl->TotalTmr); Coordinate_Partition(ctrl, graph, *ndims, xyz, 0); icopy(graph->nvtxs, graph->where, part); STOPTIMER(ctrl, ctrl->TotalTmr); IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); gk_free((void **)&xadj, (void **)&adjncy, LTERM); FreeInitialGraphAndRemap(graph); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
/************************************************************************** * mexFunction: gateway routine for MATLAB interface. ***************************************************************************/ void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { // Argument checking if (nrhs != 5) mexErrMsgIdAndTxt(FUNC_NAME, "Wrong input."); if (nlhs != 3) mexErrMsgIdAndTxt(FUNC_NAME, "Wrong output."); // Input and output variables idx_t nvtxs = (idx_t) mxGetScalar(nvtxs_in); idx_t *xadj; GetIdxArray(xadj_in,&xadj); idx_t *adjncy; GetIdxArray(adjncy_in,&adjncy); idx_t *vwgt; GetIdxArray(vwgt_in,&vwgt); idx_t options[METIS_NOPTIONS]; GetOptions(options_in, options); idx_t *sepidx; idx_t *lgraphidx; idx_t *rgraphidx; // Metis main function idx_t i, nnvtxs=0; idx_t ptlgraph, ptrgraph, ptsep; graph_t *graph=NULL; ctrl_t *ctrl; idx_t *piperm; idx_t snvtxs[3]; idx_t *where; /* set up malloc cleaning code and signal catchers */ if (!gk_malloc_init()) CheckReturn( METIS_ERROR_MEMORY, FUNC_NAME ); // set up the run time parameters ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL); // prune the dense columns if (ctrl->pfactor > 0.0) { piperm = imalloc(nvtxs, "OMETIS: piperm"); graph = PruneGraph(ctrl, nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor); if (graph == NULL) { // if there was no prunning, cleanup the pfactor gk_free((void **)&piperm, LTERM); ctrl->pfactor = 0.0; } else { nnvtxs = graph->nvtxs; // disable compression if prunning took place ctrl->compress = 0; } } // compress the graph if (ctrl->compress) ctrl->compress = 0; // if no prunning and no compression, setup the graph in the normal way. if (ctrl->pfactor == 0.0 && ctrl->compress == 0) graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL); ASSERT(CheckGraph(graph, ctrl->numflag, 1)); /* allocate workspace memory */ AllocateWorkSpace(ctrl, graph); MlevelNodeBisectionMultiple(ctrl, graph); snvtxs[0] = 0; snvtxs[1] = 0; snvtxs[2] = 0; if (ctrl->pfactor > 0.0) snvtxs[2] += nvtxs-nnvtxs; where = graph->where; for (i=0; i<graph->nvtxs; i++) snvtxs[where[i]]++; lgraphidx = (idx_t*) mxCalloc (snvtxs[0], sizeof(idx_t)); rgraphidx = (idx_t*) mxCalloc (snvtxs[1], sizeof(idx_t)); sepidx = (idx_t*) mxCalloc (snvtxs[2], sizeof(idx_t)); ptlgraph = 0; ptrgraph = 0; ptsep = 0; if (ctrl->pfactor > 0.0) { for (i=0; i<graph->nvtxs; i++) if (where[i] == 0) lgraphidx[ptlgraph++] = piperm[i]; else if (where[i] == 1) rgraphidx[ptrgraph++] = piperm[i]; else sepidx[ptsep++] = piperm[i]; for (i=nnvtxs; i<nvtxs; i++) sepidx[ptsep++] = piperm[i]; gk_free((void **)&piperm, LTERM); } else { for (i=0; i<graph->nvtxs; i++) if (where[i] == 0) lgraphidx[ptlgraph++] = i; else if (where[i] == 1) rgraphidx[ptrgraph++] = i; else sepidx[ptsep++] = i; } /* clean up */ FreeCtrl(&ctrl); // Output lgraphidx_out = mxCreateDoubleMatrix(1,ptlgraph,mxREAL); mxSetData(lgraphidx_out,mxMalloc(sizeof(double)*ptlgraph)); double *lgraphidx_out_pr = mxGetPr(lgraphidx_out); for(idx_t i=0; i<ptlgraph; i++) lgraphidx_out_pr[i] = (double) lgraphidx[i]; rgraphidx_out = mxCreateDoubleMatrix(1,ptrgraph,mxREAL); mxSetData(rgraphidx_out,mxMalloc(sizeof(double)*ptrgraph)); double *rgraphidx_out_pr = mxGetPr(rgraphidx_out); for(idx_t i=0; i<ptrgraph; i++) rgraphidx_out_pr[i] = (double) rgraphidx[i]; sepidx_out = mxCreateDoubleMatrix(1,ptsep,mxREAL); mxSetData(sepidx_out,mxMalloc(sizeof(double)*ptsep)); double *sepidx_out_pr = mxGetPr(sepidx_out); for(idx_t i=0; i<ptsep; i++) sepidx_out_pr[i] = (double) sepidx[i]; }
/************************************************************************* * This function converts a mesh into a dual graph **************************************************************************/ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, int *ncommonnodes, idxtype **xadj, idxtype **adjncy, MPI_Comm *comm) { int i, j, jj, k, kk, m; int npes, mype, pe, count, mask, pass; int nelms, lnns, my_nns, node; int firstelm, firstnode, lnode, nrecv, nsend; int *scounts, *rcounts, *sdispl, *rdispl; idxtype *nodedist, *nmap, *auxarray; idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL; idxtype *sbuffer, *rbuffer, *htable; KeyValueType *nodelist, *recvbuffer; idxtype ind[200], wgt[200]; int gmaxnode, gminnode; CtrlType ctrl; SetUpCtrl(&ctrl, -1, 0, *comm); npes = ctrl.npes; mype = ctrl.mype; nelms = elmdist[mype+1]-elmdist[mype]; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1); mask = (1<<11)-1; /*****************************/ /* Determine number of nodes */ /*****************************/ gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]); for (i=0; i<eptr[nelms]; i++) eind[i] -= gminnode; gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]); /**************************/ /* Check for input errors */ /**************************/ ASSERTS(nelms > 0); /* construct node distribution array */ nodedist = idxsmalloc(npes+1, 0, "nodedist"); for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) { k = j/(npes-i); nodedist[i+1] = nodedist[i]+k; j -= k; } my_nns = nodedist[mype+1]-nodedist[mype]; firstnode = nodedist[mype]; nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist"); auxarray = idxmalloc(eptr[nelms], "auxarray"); htable = idxsmalloc(amax(my_nns, mask+1), -1, "htable"); scounts = imalloc(4*npes+2, "scounts"); rcounts = scounts+npes; sdispl = scounts+2*npes; rdispl = scounts+3*npes+1; /*********************************************/ /* first find a local numbering of the nodes */ /*********************************************/ for (i=0; i<nelms; i++) { for (j=eptr[i]; j<eptr[i+1]; j++) { nodelist[j].key = eind[j]; nodelist[j].val = j; auxarray[j] = i; /* remember the local element ID that uses this node */ } } ikeysort(eptr[nelms], nodelist); for (count=1, i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) count++; } lnns = count; nmap = idxmalloc(lnns, "nmap"); /* renumber the nodes of the elements array */ count = 1; nmap[0] = nodelist[0].key; eind[nodelist[0].val] = 0; nodelist[0].val = auxarray[nodelist[0].val]; /* Store the local element ID */ for (i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) { nmap[count] = nodelist[i].key; count++; } eind[nodelist[i].val] = count-1; nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */ } MPI_Barrier(*comm); /**********************************************************/ /* perform comms necessary to construct node-element list */ /**********************************************************/ iset(npes, 0, scounts); for (pe=i=0; i<eptr[nelms]; i++) { while (nodelist[i].key >= nodedist[pe+1]) pe++; scounts[pe] += 2; } ASSERTS(pe < npes); MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); ASSERTS(sdispl[npes] == eptr[nelms]*2); nrecv = rdispl[npes]/2; recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer"); MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); /**************************************/ /* construct global node-element list */ /**************************************/ gnptr = idxsmalloc(my_nns+1, 0, "gnptr"); for (i=0; i<npes; i++) { for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; ASSERTS(lnode >= 0 && lnode < my_nns) gnptr[lnode]++; } } MAKECSR(i, my_nns, gnptr); gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind"); for (pe=0; pe<npes; pe++) { firstelm = elmdist[pe]; for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm; } } SHIFTCSR(i, my_nns, gnptr); /*********************************************************/ /* send the node-element info to the relevant processors */ /*********************************************************/ iset(npes, 0, scounts); /* use a hash table to ensure that each node is sent to a proc only once */ for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { scounts[pe] += gnptr[lnode+1]-gnptr[lnode]; htable[lnode] = 1; } } /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); /* create the send buffer */ nsend = sdispl[npes]; sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend)); count = 0; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) { if (k == gnptr[lnode]) sbuffer[count++] = -1*(gnind[k]+1); else sbuffer[count++] = gnind[k]; } htable[lnode] = 1; } } ASSERTS(count == sdispl[pe+1]); /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); nrecv = rdispl[npes]; rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv)); MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); k = -1; nptr = idxsmalloc(lnns+1, 0, "nptr"); nind = rbuffer; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]; j<rdispl[pe+1]; j++) { if (nind[j] < 0) { k++; nind[j] = (-1*nind[j])-1; } nptr[k]++; } } MAKECSR(i, lnns, nptr); ASSERTS(k+1 == lnns); ASSERTS(nptr[lnns] == nrecv) myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj"); idxset(mask+1, -1, htable); firstelm = elmdist[mype]; /* Two passes -- in first pass, simply find out the memory requirements */ for (pass=0; pass<2; pass++) { for (i=0; i<nelms; i++) { for (count=0, j=eptr[i]; j<eptr[i+1]; j++) { node = eind[j]; for (k=nptr[node]; k<nptr[node+1]; k++) { if ((kk=nind[k]) == firstelm+i) continue; m = htable[(kk&mask)]; if (m == -1) { ind[count] = kk; wgt[count] = 1; htable[(kk&mask)] = count++; } else { if (ind[m] == kk) { wgt[m]++; } else { for (jj=0; jj<count; jj++) { if (ind[jj] == kk) { wgt[jj]++; break; } } if (jj == count) { ind[count] = kk; wgt[count++] = 1; } } } } } for (j=0; j<count; j++) { htable[(ind[j]&mask)] = -1; if (wgt[j] >= *ncommonnodes) { if (pass == 0) myxadj[i]++; else myadjncy[myxadj[i]++] = ind[j]; } } } if (pass == 0) { MAKECSR(i, nelms, myxadj); myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy"); } else { SHIFTCSR(i, nelms, myxadj); } } /*****************************************/ /* correctly renumber the elements array */ /*****************************************/ for (i=0; i<eptr[nelms]; i++) eind[i] = nmap[eind[i]] + gminnode; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0); /* do not free nodelist, recvbuffer, rbuffer */ GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, (void **)&gnind, (void **)&auxarray, LTERM); FreeCtrl(&ctrl); return; }
/*********************************************************************************** * This function is the entry point of the serial ordering algorithm. ************************************************************************************/ int ParMETIS_SerialNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, MPI_Comm *comm) { idx_t i, npes, mype; ctrl_t *ctrl=NULL; graph_t *agraph=NULL; idx_t *perm=NULL, *iperm=NULL; idx_t *sendcount, *displs; /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_OMETIS, options, 1, 1, NULL, NULL, *comm); npes = ctrl->npes; mype = ctrl->mype; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); FreeCtrl(&ctrl); return METIS_ERROR; } if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); STARTTIMER(ctrl, ctrl->TotalTmr); STARTTIMER(ctrl, ctrl->MoveTmr); agraph = AssembleEntireGraph(ctrl, vtxdist, xadj, adjncy); STOPTIMER(ctrl, ctrl->MoveTmr); if (mype == 0) { perm = imalloc(agraph->nvtxs, "PAROMETISS: perm"); iperm = imalloc(agraph->nvtxs, "PAROMETISS: iperm"); METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, npes, NULL, perm, iperm, sizes); } STARTTIMER(ctrl, ctrl->MoveTmr); /* Broadcast the sizes array */ gkMPI_Bcast((void *)sizes, 2*npes, IDX_T, 0, ctrl->gcomm); /* Scatter the iperm */ sendcount = imalloc(npes, "PAROMETISS: sendcount"); displs = imalloc(npes, "PAROMETISS: displs"); for (i=0; i<npes; i++) { sendcount[i] = vtxdist[i+1]-vtxdist[i]; displs[i] = vtxdist[i]; } gkMPI_Scatterv((void *)iperm, sendcount, displs, IDX_T, (void *)order, vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, ctrl->gcomm); STOPTIMER(ctrl, ctrl->MoveTmr); STOPTIMER(ctrl, ctrl->TotalTmr); IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); gk_free((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, &sendcount, &displs, &agraph, LTERM); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); goto DONE; DONE: FreeCtrl(&ctrl); return METIS_OK; }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i; int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int tewgt, tvsize, nmoved, maxin, maxout; float gtewgt, gtvsize, avg, maximb; int ps_relation, seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /* ADD: take care of disconnected graph */ /* ADD: take care of highly unbalanced vtxdist */ /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /**************************/ /* Set up data structures */ /**************************/ if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED; } SetUpCtrl(&ctrl, inparts, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts)); ctrl.ipc_factor = 1000.0; ctrl.redist_factor = 1.0; ctrl.redist_base = 1.0; ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = REFINE_PARTITION; ctrl.ps_relation = ps_relation; ctrl.tpwgts = itpwgts; graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize"); graph->home = idxmalloc(graph->nvtxs, "home"); if (ctrl.ps_relation == COUPLED) idxset(graph->nvtxs, mype, graph->home); else idxcopy(graph->nvtxs, part, graph->home); tewgt = idxsum(graph->nedges, graph->adjwgt); tvsize = idxsum(graph->nvtxs, graph->vsize); gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl.edge_size_ratio = gtewgt/gtvsize; scopy(incon, iubvec, ctrl.ubvec); PreAllocateMemory(&ctrl, graph, &wspace); /***********************/ /* Partition and Remap */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Adaptive_Partition(&ctrl, graph, &wspace); ParallelReMapGraph(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); idxcopy(graph->nvtxs, graph->where, part); if (edgecut != NULL) *edgecut = graph->mincut; /***********************/ /* Take care of output */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); } /*************************************/ /* Free memory, renumber, and return */ /*************************************/ GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM); GKfree((void **)&itpwgts, LTERM); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); return; }