/************************************************************************* * This function is my wrapper around realloc **************************************************************************/ void *gk_realloc(void *oldptr, size_t nbytes, char *msg) { void *ptr=NULL; if (nbytes == 0) nbytes++; /* Force mallocs to actually allocate some memory */ /* remove this memory de-allocation */ if (gkmcore != NULL && oldptr != NULL) gk_gkmcoreDel(gkmcore, oldptr); ptr = (void *)realloc(oldptr, nbytes); if (ptr == NULL) { fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); gk_errexit(SIGMEM, "***Memory realloc failed for %s. " "Requested size: %zu bytes", msg, nbytes); return NULL; } /* add this memory allocation */ if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); return ptr; }
void *gk_malloc(size_t nbytes, char *msg) { void *ptr=NULL; if (nbytes == 0) nbytes++; /* Force mallocs to actually allocate some memory */ ptr = (void *)malloc(nbytes); if (ptr == NULL) { fprintf(stderr, " Current memory used: %10zu bytes\n", gk_GetCurMemoryUsed()); fprintf(stderr, " Maximum memory used: %10zu bytes\n", gk_GetMaxMemoryUsed()); gk_errexit(SIGMEM, "***Memory allocation failed for %s. Requested size: %zu bytes", msg, nbytes); return NULL; } /* add this memory allocation */ if (gkmcore != NULL) gk_gkmcoreAdd(gkmcore, GK_MOPT_HEAP, nbytes, ptr); /* zero-out the allocated space */ #ifndef NDEBUG memset(ptr, 0, nbytes); #endif return ptr; }
void print_final_info(params_t *params) { printf("\n"); printf("Memory Usage Information -----------------------------------------------------\n"); printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed()); printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed()); printf("********************************************************************************\n"); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ int ParMETIS_V3_RefineKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { idx_t npes, mype, status; ctrl_t *ctrl=NULL; graph_t *graph=NULL; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup ctrl */ ctrl = SetupCtrl(PARMETIS_OP_RMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); npes = ctrl->npes; mype = ctrl->mype; /* Take care the nparts == 1 case */ if (*nparts == 1) { iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; goto DONE; } /* setup the graph */ if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); if (ctrl->ps_relation == PARMETIS_PSR_COUPLED) iset(graph->nvtxs, mype, graph->home); else icopy(graph->nvtxs, part, graph->home); /* Allocate workspace */ AllocateWSpace(ctrl, 10*graph->nvtxs); /* Partition and Remap */ STARTTIMER(ctrl, ctrl->TotalTmr); ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 50*(*ncon)*gk_max(npes, *nparts)); Adaptive_Partition(ctrl, graph); ParallelReMapGraph(ctrl, graph); icopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; STOPTIMER(ctrl, ctrl->TotalTmr); /* Take care of output */ IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1)); FreeInitialGraphAndRemap(graph); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
int main(int argc, char *argv[]) { idx_t options[METIS_NOPTIONS]; bigraph_t *bigraph; idx_t *perm, *iperm; params_t *params; int status, i, j; /* rdiags[i][0] and cdiags[i][0] saves the length of each array * excluding the first value */ idx_t **rdiags, **cdiags; idx_t ndiags; params = parse_cmdline(argc, argv); gk_startcputimer(params->iotimer); bigraph = ReadBiGraph(params); gk_stopcputimer(params->iotimer); if(bigraph == NULL){ printf("Input Error : nrows + ncols != nvtxs\n"); printf("\n***Metis returned with an error.\n"); return -1; } BDFPrintInfo(params, bigraph); METIS_SetDefaultOptions(options); /*User specific parameters*/ options[METIS_OPTION_CTYPE] = params->ctype; options[METIS_OPTION_IPTYPE] = params->iptype; options[METIS_OPTION_RTYPE] = params->rtype; options[METIS_OPTION_CCORDER] = params->ccorder; options[METIS_OPTION_SEED] = params->seed; options[METIS_OPTION_DBGLVL] = params->dbglvl; options[METIS_OPTION_DENSITY] = params->density * DIVIDER; options[METIS_OPTION_NROWS] = params->nrows; options[METIS_OPTION_NCOLS] = params->ncols; options[METIS_OPTION_KAPPA] = params->kappa; options[METIS_OPTION_NDIAGS] = params->ndiags; /*Inner parameters*/ options[METIS_OPTION_COMPRESS] = params->compress; options[METIS_OPTION_UFACTOR] = params->ufactor; options[METIS_OPTION_PFACTOR] = params->pfactor; options[METIS_OPTION_NCUTS] = params->ncuts; options[METIS_OPTION_NSEPS] = params->nseps; options[METIS_OPTION_NITER] = params->niter; options[METIS_OPTION_OBJTYPE] = params->objtype; perm = imalloc(bigraph->super->nvtxs, "main: perm"); iperm = imalloc(bigraph->super->nvtxs, "main: iperm"); gk_malloc_init(); gk_startcputimer(params->parttimer); /* Initialize my global paramters */ gk_clearcputimer(_parttimer); gk_clearcputimer(_nztimer); _totalcheck = 0; _firsthit = 0; _maxarea = -1; _maxnz = -1; _minarea = 700000000000; _minnz = 300000000; _avgarea = 0; _avgnz = 0; _maxdense = 0; _mindense = 0; /* All the memory that is not allocated in this file should be allocated after * gk_malloc_init() and be freed before gk_GetCurMemoryUsed(). * Memory that is allocated in this file should be free in the end of main()*/ status = METIS_NodeBDF(&bigraph->super->nvtxs, bigraph->super->xadj, bigraph->super->adjncy, bigraph->super->vwgt, bigraph->nrows, bigraph->ncols, options, bigraph->rlabel->label, bigraph->rlabel->ref, bigraph->clabel->label, bigraph->clabel->ref, &rdiags, &cdiags, &ndiags, perm, iperm); gk_stopcputimer(params->parttimer); if (gk_GetCurMemoryUsed() != 0) printf("***It seems that Metis did not free all of its memory!\n"); params->maxmemory = gk_GetMaxMemoryUsed(); gk_malloc_cleanup(0); if (status != METIS_OK) { printf("\n***Metis returned with an error.\n"); } else { if (! params->nooutput) { /* Write the permutation */ gk_startcputimer(params->iotimer); WritePermutation(params->filename, iperm, bigraph->super->nvtxs); WriteDiags(params->filename, rdiags, cdiags, ndiags); gk_stopcputimer(params->iotimer); } BDFReportResults(params, bigraph); } /* free inner function memory */ for (i = 0; i < ndiags; i++) { free((void*)rdiags[i]); free((void*)cdiags[i]); } free((void*)rdiags); free((void*)cdiags); /* free memroy allocated in this function */ FreeBiGraph((ctrl_t*)NULL, &bigraph); gk_free((void **)&perm, &iperm, LTERM); gk_free((void **)¶ms->filename, ¶ms->tpwgtsfile, ¶ms->tpwgts, ¶ms->ubvec, ¶ms, LTERM); return status; }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ int ParMETIS_V3_PartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, real_t *xyz, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm) { idx_t h, i, j, npes, mype, status, nvtxs, seed, dbglvl; idx_t cut, gcut, maxnvtxs; idx_t moptions[METIS_NOPTIONS]; ctrl_t *ctrl; graph_t *graph, *mgraph; real_t balance; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, numflag, ndims, xyz, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_GKMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm); npes = ctrl->npes; mype = ctrl->mype; /* Take care the nparts == 1 case */ if (*nparts == 1) { iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); *edgecut = 0; goto DONE; } /* Take care of npes == 1 case */ if (npes == 1) { nvtxs = vtxdist[1] - vtxdist[0]; /* subtraction is required when numflag==1 */ METIS_SetDefaultOptions(moptions); moptions[METIS_OPTION_NUMBERING] = *numflag; status = METIS_PartGraphKway(&nvtxs, ncon, xadj, adjncy, vwgt, NULL, adjwgt, nparts, tpwgts, ubvec, moptions, edgecut, part); goto DONE; } /* Setup the graph */ if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag); gk_free((void **)&graph->nvwgt, LTERM); /* Allocate the workspace */ AllocateWSpace(ctrl, 10*graph->nvtxs); /* Compute the initial npes-way partitioning geometric partitioning */ STARTTIMER(ctrl, ctrl->TotalTmr); Coordinate_Partition(ctrl, graph, *ndims, xyz, 1); STOPTIMER(ctrl, ctrl->TotalTmr); /* Move the graph according to the partitioning */ STARTTIMER(ctrl, ctrl->MoveTmr); ctrl->nparts = npes; mgraph = MoveGraph(ctrl, graph); ctrl->nparts = *nparts; SetupGraph_nvwgts(ctrl, mgraph); /* compute nvwgts for the moved graph */ if (ctrl->dbglvl&DBG_INFO) { CommInterfaceData(ctrl, graph, graph->where, graph->where+graph->nvtxs); for (cut=0, i=0; i<graph->nvtxs; i++) { for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) { if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; } } gcut = GlobalSESum(ctrl, cut)/2; maxnvtxs = GlobalSEMax(ctrl, mgraph->nvtxs); balance = (real_t)(maxnvtxs)/((real_t)(graph->gnvtxs)/(real_t)(npes)); rprintf(ctrl, "XYZ Cut: %6"PRIDX" \tBalance: %6.3"PRREAL" [%"PRIDX" %"PRIDX" %"PRIDX"]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } STOPTIMER(ctrl, ctrl->MoveTmr); /* Compute the partition of the moved graph */ STARTTIMER(ctrl, ctrl->TotalTmr); ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts)); if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(ctrl, mgraph->nedges) == 0) { /* serially */ IFSET(ctrl->dbglvl, DBG_INFO, rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes])); PartitionSmallGraph(ctrl, mgraph); } else { /* in parallel */ Global_Partition(ctrl, mgraph); } ParallelReMapGraph(ctrl, mgraph); /* Invert the ordering back to the original graph */ ctrl->nparts = npes; ProjectInfoBack(ctrl, graph, part, mgraph->where); ctrl->nparts = *nparts; *edgecut = mgraph->mincut; STOPTIMER(ctrl, ctrl->TotalTmr); /* Print some stats */ IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm)); IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, mgraph, 0)); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph); if (*numflag > 0) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ int ParMETIS_V3_PartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, MPI_Comm *comm) { idx_t i, nvtxs, firstvtx, npes, mype, status; idx_t *xadj, *adjncy; ctrl_t *ctrl=NULL; graph_t *graph=NULL; size_t curmem; /* Check the input parameters and return if an error */ status = CheckInputsPartGeom(vtxdist, ndims, xyz, part, comm); if (GlobalSEMinComm(*comm, status) == 0) return METIS_ERROR; status = METIS_OK; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Setup the ctrl */ ctrl = SetupCtrl(PARMETIS_OP_GMETIS, NULL, 1, 1, NULL, NULL, *comm); /*ctrl->dbglvl=15;*/ npes = ctrl->npes; mype = ctrl->mype; /* Trivial case when npes == 1 */ if (npes == 1) { iset(vtxdist[mype+1]-vtxdist[mype], 0, part); goto DONE; } /* Setup a fake graph to allow the rest of the code to work unchanged */ nvtxs = vtxdist[mype+1]-vtxdist[mype]; firstvtx = vtxdist[mype]; xadj = imalloc(nvtxs+1, "ParMETIS_PartGeom: xadj"); adjncy = imalloc(nvtxs, "ParMETIS_PartGeom: adjncy"); for (i=0; i<nvtxs; i++) { xadj[i] = i; adjncy[i] = firstvtx + (i+1)%nvtxs; } xadj[nvtxs] = nvtxs; graph = SetupGraph(ctrl, 1, vtxdist, xadj, NULL, NULL, adjncy, NULL, 0); /* Allocate workspace memory */ AllocateWSpace(ctrl, 5*graph->nvtxs); /* Compute the initial geometric partitioning */ STARTTIMER(ctrl, ctrl->TotalTmr); Coordinate_Partition(ctrl, graph, *ndims, xyz, 0); icopy(graph->nvtxs, graph->where, part); STOPTIMER(ctrl, ctrl->TotalTmr); IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl)); gk_free((void **)&xadj, (void **)&adjncy, LTERM); FreeInitialGraphAndRemap(graph); DONE: FreeCtrl(&ctrl); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return (int)status; }
/************************************************************************* * This function converts a mesh into a dual graph **************************************************************************/ int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *numflag, idx_t *ncommon, idx_t **r_xadj, idx_t **r_adjncy, MPI_Comm *comm) { idx_t i, j, jj, k, kk, m; idx_t npes, mype, pe, count, mask, pass; idx_t nelms, lnns, my_nns, node; idx_t firstelm, firstnode, lnode, nrecv, nsend; idx_t *scounts, *rcounts, *sdispl, *rdispl; idx_t *nodedist, *nmap, *auxarray; idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL; idx_t *sbuffer, *rbuffer, *htable; ikv_t *nodelist, *recvbuffer; idx_t maxcount, *ind, *wgt; idx_t gmaxnode, gminnode; size_t curmem; gk_malloc_init(); curmem = gk_GetCurMemoryUsed(); /* Get basic comm info */ gkMPI_Comm_size(*comm, &npes); gkMPI_Comm_rank(*comm, &mype); nelms = elmdist[mype+1]-elmdist[mype]; if (*numflag > 0) ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1); mask = (1<<11)-1; /*****************************/ /* Determine number of nodes */ /*****************************/ gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind)); for (i=0; i<eptr[nelms]; i++) eind[i] -= gminnode; gmaxnode = GlobalSEMaxComm(*comm, imax(eptr[nelms], eind)); /**************************/ /* Check for input errors */ /**************************/ ASSERT(nelms > 0); /* construct node distribution array */ nodedist = ismalloc(npes+1, 0, "nodedist"); for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) { k = j/(npes-i); nodedist[i+1] = nodedist[i]+k; j -= k; } my_nns = nodedist[mype+1]-nodedist[mype]; firstnode = nodedist[mype]; nodelist = ikvmalloc(eptr[nelms], "nodelist"); auxarray = imalloc(eptr[nelms], "auxarray"); htable = ismalloc(gk_max(my_nns, mask+1), -1, "htable"); scounts = imalloc(npes, "scounts"); rcounts = imalloc(npes, "rcounts"); sdispl = imalloc(npes+1, "sdispl"); rdispl = imalloc(npes+1, "rdispl"); /*********************************************/ /* first find a local numbering of the nodes */ /*********************************************/ for (i=0; i<nelms; i++) { for (j=eptr[i]; j<eptr[i+1]; j++) { nodelist[j].key = eind[j]; nodelist[j].val = j; auxarray[j] = i; /* remember the local element ID that uses this node */ } } ikvsorti(eptr[nelms], nodelist); for (count=1, i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) count++; } lnns = count; nmap = imalloc(lnns, "nmap"); /* renumber the nodes of the elements array */ count = 1; nmap[0] = nodelist[0].key; eind[nodelist[0].val] = 0; nodelist[0].val = auxarray[nodelist[0].val]; /* Store the local element ID */ for (i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) { nmap[count] = nodelist[i].key; count++; } eind[nodelist[i].val] = count-1; nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */ } gkMPI_Barrier(*comm); /**********************************************************/ /* perform comms necessary to construct node-element list */ /**********************************************************/ iset(npes, 0, scounts); for (pe=i=0; i<eptr[nelms]; i++) { while (nodelist[i].key >= nodedist[pe+1]) pe++; scounts[pe] += 2; } ASSERT(pe < npes); gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); ASSERT(sdispl[npes] == eptr[nelms]*2); nrecv = rdispl[npes]/2; recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer"); gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer, rcounts, rdispl, IDX_T, *comm); /**************************************/ /* construct global node-element list */ /**************************************/ gnptr = ismalloc(my_nns+1, 0, "gnptr"); for (i=0; i<npes; i++) { for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; ASSERT(lnode >= 0 && lnode < my_nns) gnptr[lnode]++; } } MAKECSR(i, my_nns, gnptr); gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind"); for (pe=0; pe<npes; pe++) { firstelm = elmdist[pe]; for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm; } } SHIFTCSR(i, my_nns, gnptr); /*********************************************************/ /* send the node-element info to the relevant processors */ /*********************************************************/ iset(npes, 0, scounts); /* use a hash table to ensure that each node is sent to a proc only once */ for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { scounts[pe] += gnptr[lnode+1]-gnptr[lnode]; htable[lnode] = 1; } } /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); /* create the send buffer */ nsend = sdispl[npes]; sbuffer = imalloc(gk_max(1, nsend), "sbuffer"); count = 0; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) { if (k == gnptr[lnode]) sbuffer[count++] = -1*(gnind[k]+1); else sbuffer[count++] = gnind[k]; } htable[lnode] = 1; } } ASSERT(count == sdispl[pe+1]); /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); nrecv = rdispl[npes]; rbuffer = imalloc(gk_max(1, nrecv), "rbuffer"); gkMPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_T, (void *)rbuffer, rcounts, rdispl, IDX_T, *comm); k = -1; nptr = ismalloc(lnns+1, 0, "nptr"); nind = rbuffer; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]; j<rdispl[pe+1]; j++) { if (nind[j] < 0) { k++; nind[j] = (-1*nind[j])-1; } nptr[k]++; } } MAKECSR(i, lnns, nptr); ASSERT(k+1 == lnns); ASSERT(nptr[lnns] == nrecv) myxadj = *r_xadj = (idx_t *)malloc(sizeof(idx_t)*(nelms+1)); if (myxadj == NULL) gk_errexit(SIGMEM, "Failed to allocate memory for the dual graph's xadj array.\n"); iset(nelms+1, 0, myxadj); iset(mask+1, -1, htable); firstelm = elmdist[mype]; /* Two passes -- in first pass, simply find out the memory requirements */ maxcount = 200; ind = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: ind"); wgt = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: wgt"); for (pass=0; pass<2; pass++) { for (i=0; i<nelms; i++) { for (count=0, j=eptr[i]; j<eptr[i+1]; j++) { node = eind[j]; for (k=nptr[node]; k<nptr[node+1]; k++) { if ((kk=nind[k]) == firstelm+i) continue; m = htable[(kk&mask)]; if (m == -1) { ind[count] = kk; wgt[count] = 1; htable[(kk&mask)] = count++; } else { if (ind[m] == kk) { wgt[m]++; } else { for (jj=0; jj<count; jj++) { if (ind[jj] == kk) { wgt[jj]++; break; } } if (jj == count) { ind[count] = kk; wgt[count++] = 1; } } } /* Adjust the memory. This will be replaced by a idxrealloc() when GKlib will be incorporated */ if (count == maxcount-1) { maxcount *= 2; ind = irealloc(ind, maxcount, "ParMETIS_V3_Mesh2Dual: ind"); wgt = irealloc(wgt, maxcount, "ParMETIS_V3_Mesh2Dual: wgt"); } } } for (j=0; j<count; j++) { htable[(ind[j]&mask)] = -1; if (wgt[j] >= *ncommon) { if (pass == 0) myxadj[i]++; else myadjncy[myxadj[i]++] = ind[j]; } } } if (pass == 0) { MAKECSR(i, nelms, myxadj); myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]); if (myadjncy == NULL) gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n"); } else { SHIFTCSR(i, nelms, myxadj); } } /*****************************************/ /* correctly renumber the elements array */ /*****************************************/ for (i=0; i<eptr[nelms]; i++) eind[i] = nmap[eind[i]] + gminnode; if (*numflag == 1) ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0); /* do not free nodelist, recvbuffer, rbuffer */ gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts, &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer, &nptr, &ind, &wgt, LTERM); if (gk_GetCurMemoryUsed() - curmem > 0) { printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n", (ssize_t)(gk_GetCurMemoryUsed() - curmem)); } gk_malloc_cleanup(0); return METIS_OK; }
idx_t* gpmetis( int argc, char **argv ) /*************************************************************************/ /*! Let the game begin! */ /*************************************************************************/ //int main(int argc, char *argv[]) { idx_t i; char *curptr, *newptr; idx_t options[METIS_NOPTIONS]; graph_t *graph; idx_t *part; idx_t objval; params_t *params; int status=0; gk_optind = 0; //printf( "argc: %d\n", argc ); //printf( "gk_optind %d\n", gk_optind ); fflush( stdout ); for( i = 0; i < argc; i++ ) { //printf( "%s*\n", argv[ i ] ); } params = parse_cmdline(argc, argv); //printf( "gk_optind %d\n", gk_optind ); //fflush( stdout ); //return NULL; gk_startcputimer(params->iotimer); graph = ReadGraph(params); ReadTPwgts(params, graph->ncon); gk_stopcputimer(params->iotimer); /* Check if the graph is contiguous */ if (params->contig && !IsConnected(graph, 0)) { printf("***The input graph is not contiguous.\n" "***The specified -contig option will be ignored.\n"); params->contig = 0; } /* Get ubvec if supplied */ if (params->ubvecstr) { params->ubvec = rmalloc(graph->ncon, "main"); curptr = params->ubvecstr; for (i=0; i<graph->ncon; i++) { params->ubvec[i] = strtoreal(curptr, &newptr); if (curptr == newptr) errexit("Error parsing entry #%"PRIDX" of ubvec [%s] (possibly missing).\n", i, params->ubvecstr); curptr = newptr; } } /* Setup iptype */ if (params->iptype == -1) { if (params->ptype == METIS_PTYPE_RB) { if (graph->ncon == 1) params->iptype = METIS_IPTYPE_GROW; else params->iptype = METIS_IPTYPE_RANDOM; } } GPPrintInfo(params, graph); part = imalloc(graph->nvtxs, "main: part"); METIS_SetDefaultOptions(options); options[METIS_OPTION_OBJTYPE] = params->objtype; options[METIS_OPTION_CTYPE] = params->ctype; options[METIS_OPTION_IPTYPE] = params->iptype; options[METIS_OPTION_RTYPE] = params->rtype; options[METIS_OPTION_MINCONN] = params->minconn; options[METIS_OPTION_CONTIG] = params->contig; options[METIS_OPTION_SEED] = params->seed; options[METIS_OPTION_NITER] = params->niter; options[METIS_OPTION_NCUTS] = params->ncuts; options[METIS_OPTION_UFACTOR] = params->ufactor; options[METIS_OPTION_DBGLVL] = params->dbglvl; gk_malloc_init(); gk_startcputimer(params->parttimer); switch (params->ptype) { case METIS_PTYPE_RB: status = METIS_PartGraphRecursive(&graph->nvtxs, &graph->ncon, graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, graph->adjwgt, ¶ms->nparts, params->tpwgts, params->ubvec, options, &objval, part); break; case METIS_PTYPE_KWAY: status = METIS_PartGraphKway(&graph->nvtxs, &graph->ncon, graph->xadj, graph->adjncy, graph->vwgt, graph->vsize, graph->adjwgt, ¶ms->nparts, params->tpwgts, params->ubvec, options, &objval, part); break; } gk_stopcputimer(params->parttimer); if (gk_GetCurMemoryUsed() != 0) printf("***It seems that Metis did not free all of its memory! Report this.\n"); params->maxmemory = gk_GetMaxMemoryUsed(); gk_malloc_cleanup(0); if (status != METIS_OK) { printf("\n***Metis returned with an error.\n"); } else { if (!params->nooutput) { /* Write the solution */ gk_startcputimer(params->iotimer); WritePartition(params->filename, part, graph->nvtxs, params->nparts); gk_stopcputimer(params->iotimer); } GPReportResults(params, graph, part, objval); } idx_t *r_part = ( idx_t* ) calloc( graph->nvtxs, sizeof( idx_t ) ); for( i = 0; i < graph->nvtxs; i++ ) { r_part[ i ] = part[ i ]; } FreeGraph(&graph); gk_free((void **)&part, LTERM); gk_free((void **)¶ms->filename, ¶ms->tpwgtsfile, ¶ms->tpwgts, ¶ms->ubvecstr, ¶ms->ubvec, ¶ms, LTERM); return r_part; }