double amax2D(double **M, int len){ double max; int i, j; max = amax(M[0], len); for(i=1;i<=len;i++){ if(amax(M[i], len) > max){ max = amax(M[i], len); } else { continue; } } return(max); }
template<class T> bool next_combination(T first1, T last1, T first2, T last2) { // maximal element in set of all available elements T max_all = amax(first1, last1); T it = last2, tmp; it--; bool b = false; T before; int cnt = 0; for(;;before = it--) { if(it == first2 && *before == *get_next_bigger(first1, last1, it)) return false; if(cnt++) if(*get_next_bigger(first1, last1, it) == *before) continue; if(*it < *max_all) break; } tmp = it; do{ *it = *get_next_bigger(first1, last1, tmp); } while((tmp = it++) != last2); return true; }
int main(void) { /* CPUINFO info; GetCPUInfo(&info); printf("v_name:\t\t%s\n", info.vendorName); printf("model:\t\t%s\n", info.modelName); printf("family:\t\t%d\n", info.Family); printf("model:\t\t%d\n", info.Model); printf("stepping:\t%d\n", info.Stepping); printf("feature:\t%08x\n", info.Feature); expand(info.Feature, info.Checks); printf("os_support:\t%08x\n", info.OS_Support); expand(info.OS_Support, info.Checks); printf("checks:\t\t%08x\n", info.Checks); */ printf("test %d",amax(5,2,7) ); printf("\n\nPress ENTER to exit. "); getchar(); return 1; }
TYPE nrminf( // const ArrayListV<TYPE>& ax //[in] ) { int n = ax.size(); const TYPE* x = ax.getPointer(); return amax(n, x, 1); }
// **************************************************************************** // Method: avtAxisRestrictionToolInterface::ResetNumberOfAxes // // Purpose: // Sets the number of available axes. // // Arguments: // n the number of axes // // Programmer: Jeremy Meredith // Creation: February 1, 2008 // // Modifications: // Jeremy Meredith, Fri Feb 15 13:21:20 EST 2008 // Added axis names to the axis restriction attributes. // // **************************************************************************** void avtAxisRestrictionToolInterface::ResetNumberOfAxes(int n) { AxisRestrictionAttributes *a = (AxisRestrictionAttributes *)atts; stringVector aname(n, ""); doubleVector amin(n, -1e+37); doubleVector amax(n, +1e+37); a->SetNames(aname); a->SetMinima(amin); a->SetMaxima(amax); }
/************************************************************************* * This function checks if the pairwise balance of the between the two * partitions will improve by moving the vertex v from pfrom to pto, * subject to the target partition weights of tfrom, and tto respectively **************************************************************************/ int IsHBalanceBetterFT(int ncon, int nparts, float *pfrom, float *pto, float *vwgt, float *ubvec) { int i/*, j, k*/; float blb1=0.0, alb1=0.0, sblb=0.0, salb=0.0; float blb2=0.0, alb2=0.0; float temp; for (i=0; i<ncon; i++) { temp = amax(pfrom[i], pto[i])*nparts/ubvec[i]; if (blb1 < temp) { blb2 = blb1; blb1 = temp; } else if (blb2 < temp) blb2 = temp; sblb += temp; temp = amax(pfrom[i]-vwgt[i], pto[i]+vwgt[i])*nparts/ubvec[i]; if (alb1 < temp) { alb2 = alb1; alb1 = temp; } else if (alb2 < temp) alb2 = temp; salb += temp; } if (alb1 < blb1) return 1; if (blb1 < alb1) return 0; if (alb2 < blb2) return 1; if (blb2 < alb2) return 0; return salb < sblb; }
/************************************************************************* * This function checks if the pairwise balance of the between the two * partitions will improve by moving the vertex v from pfrom to pto, * subject to the target partition weights of tfrom, and tto respectively **************************************************************************/ int IsHBalanceBetterFT(int ncon, floattype *pfrom, floattype *pto, floattype *nvwgt, floattype *ubvec) { int i; floattype blb1=0.0, alb1=0.0, sblb=0.0, salb=0.0; floattype blb2=0.0, alb2=0.0; floattype temp; for (i=0; i<ncon; i++) { temp = amax(pfrom[i], pto[i])/ubvec[i]; if (blb1 < temp) { blb2 = blb1; blb1 = temp; } else if (blb2 < temp) blb2 = temp; sblb += temp; temp = amax(pfrom[i]-nvwgt[i], pto[i]+nvwgt[i])/ubvec[i]; if (alb1 < temp) { alb2 = alb1; alb1 = temp; } else if (alb2 < temp) alb2 = temp; salb += temp; } if (alb1 < blb1) return 1; if (blb1 < alb1) return 0; if (alb2 < blb2) return 1; if (blb2 < alb2) return 0; return salb < sblb; }
/************************************************************************* * This function is the entry point for KWMETIS **************************************************************************/ void METIS_mCPartGraphKway(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, floattype *rubvec, int *options, int *edgecut, idxtype *part) { int i, j; GraphType graph; CtrlType ctrl; if (*numflag == 1) Change2CNumbering(*nvtxs, xadj, adjncy); SetUpGraph(&graph, OP_KMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = McKMETIS_CTYPE; ctrl.IType = McKMETIS_ITYPE; ctrl.RType = McKMETIS_RTYPE; ctrl.dbglvl = McKMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.optype = OP_KMETIS; ctrl.CoarsenTo = amax((*nvtxs)/(20*log2Int(*nparts)), 30*(*nparts)); ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo); InitRandom(-1); AllocateWorkSpace(&ctrl, &graph, *nparts); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ASSERT(CheckGraph(&graph)); *edgecut = MCMlevelKWayPartitioning(&ctrl, &graph, *nparts, part, rubvec); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); FreeWorkSpace(&ctrl, &graph); if (*numflag == 1) Change2FNumbering(*nvtxs, xadj, adjncy, part); }
/************************************************************************* * This function is the entry point for KWMETIS **************************************************************************/ void METIS_WPartGraphVKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *vsize, int *wgtflag, int *numflag, int *nparts, float *tpwgts, int *options, int *volume, idxtype *part) { int i, j; GraphType graph; CtrlType ctrl; if (*numflag == 1) Change2CNumbering(*nvtxs, xadj, adjncy); VolSetUpGraph(&graph, OP_KVMETIS, *nvtxs, 1, xadj, adjncy, vwgt, vsize, *wgtflag); if (options[0] == 0) { /* Use the default parameters */ ctrl.CType = KVMETIS_CTYPE; ctrl.IType = KVMETIS_ITYPE; ctrl.RType = KVMETIS_RTYPE; ctrl.dbglvl = KVMETIS_DBGLVL; } else { ctrl.CType = options[OPTION_CTYPE]; ctrl.IType = options[OPTION_ITYPE]; ctrl.RType = options[OPTION_RTYPE]; ctrl.dbglvl = options[OPTION_DBGLVL]; } ctrl.optype = OP_KVMETIS; ctrl.CoarsenTo = amax((*nvtxs)/(40*log2Int(*nparts)), 20*(*nparts)); ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo); InitRandom(-1); AllocateWorkSpace(&ctrl, &graph, *nparts); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); *volume = MlevelVolKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.03); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl)); FreeWorkSpace(&ctrl, &graph); if (*numflag == 1) Change2FNumbering(*nvtxs, xadj, adjncy, part); }
/*********************************************************************************** * This function creates the fused-element-graph and returns the partition ************************************************************************************/ void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol, realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *nparts, int *options, idxtype *part, MPI_Comm *comm) { int npes, mype, nvtxs; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nvtxs = vtxdist[mype+1]-vtxdist[mype]; /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */ SetUpCtrl(&ctrl, *nparts, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts)); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->where = part; PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); CreateFusedElementGraph(&ctrl, graph, &wspace, numflag); idxcopy(nvtxs, graph->where, part); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); if (((*wgtflag)&2) == 0) IMfree((void**)&graph->vwgt, LTERM); IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy, &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr, &graph->imap, &graph->sendind, &graph, LTERM); FreeWSpace(&wspace); FreeCtrl(&ctrl); }
/*********************************************************************************** * This function is the entry point of the parallel ordering algorithm. * This function assumes that the graph is already nice partitioned among the * processors and then proceeds to perform recursive bisection. ************************************************************************************/ void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag, int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm) { int i, j; int ltvwgts[MAXNCON]; int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph, *mgraph; idxtype *morder; int minnvtxs; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); nparts = npes; if (!ispow2(npes)) { if (mype == 0) printf("Error: The number of processors must be a power of 2!\n"); return; } if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) { if (mype == 0) printf("Error: Too many processors for this many vertices.\n"); return; } minnvtxs = vtxdist[1]-vtxdist[0]; for (i=0; i<npes; i++) minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i]; if (minnvtxs < (int)((float)npes*1.1)) { if (mype == 0) printf("Error: vertices are not distributed equally.\n"); return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1); SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts)); ctrl.seed = mype; ctrl.sync = seed; ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts"); ctrl.ubvec[0] = 1.03; graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag); PreAllocateMemory(&ctrl, graph, &wspace); /*======================================================= * Compute the initial k-way partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Moc_Global_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*======================================================= * Move the graph according to the partitioning =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); MALLOC_CHECK(NULL); graph->ncon = 1; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); /*======================================================= * Now compute an ordering of the moved graph =======================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); ctrl.ipart = ISEP_NODE; ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000)); /* compute tvwgts */ for (j=0; j<mgraph->ncon; j++) ltvwgts[j] = 0; for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j]; for (j=0; j<mgraph->ncon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<mgraph->ncon; j++) mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]); morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder"); MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace); MALLOC_CHECK(NULL); /* Invert the ordering back to the original graph */ ProjectInfoBack(&ctrl, graph, order, morder, &wspace); MALLOC_CHECK(NULL); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); free(ctrl.tpwgts); free(morder); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, 0); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0); MALLOC_CHECK(NULL); }
/************************************************************************* * This function induces a DT that satisfied the given size requirements **************************************************************************/ idxtype InduceDecissionTree(idxtype nvtxs, DKeyValueType **xyzcand, idxtype *sflag, idxtype nparts, idxtype *part, idxtype maxnvtxs, idxtype minnvtxs, float minfrac, idxtype *r_nnodes, idxtype *r_nlnodes, DTreeNodeType *dtree, idxtype *dtpart, idxtype *dtipart, idxtype *r_nclean, idxtype *r_naclean, idxtype *r_ndirty, idxtype *r_maxdepth, idxtype *marker) { idxtype i, nr, nl, j, k, mynodeID, dim, pid, bestdim, nleft, nright, lmaxdepth, rmaxdepth, isclean, isleaf, cleanmarked, nsvtxs; idxtype *tpwgts, *pwgts[2], lnvtxs[3]; double points[3], scores[3], sum2[2], newscore, bestscore, bestpoint; DKeyValueType *lxyzcand[3], *rxyzcand[3]; *r_maxdepth = 1; mynodeID = (*r_nnodes)++; dtree[mynodeID].nvtxs = nvtxs; dtree[mynodeID].nsvtxs = 0; dtree[mynodeID].leafid = -1; /* Determine overall pwgts */ tpwgts = idxsmalloc(nparts, 0, "InduceDecissionTree: tpwgts"); for (i=0; i<nvtxs; i++) { dtree[mynodeID].nsvtxs += (sflag[xyzcand[0][i].val] ? 1 : 0); tpwgts[part[xyzcand[0][i].val]]++; } pid = idxargmax(nparts, tpwgts); /*----------------------------------------------------------------------- * Check the exit conditions *-----------------------------------------------------------------------*/ isclean = (tpwgts[pid] == nvtxs); isleaf = 0; if (nvtxs <= maxnvtxs && isclean) { /* Determine if single class */ //mprintf("LEAF1: %5D Pure node!\n", nvtxs); *r_nclean += nvtxs; isleaf = 1; } else if (nvtxs < minnvtxs) { /* Determine if too small to continue */ for (k=0, i=0; i<nparts; i++) k += (tpwgts[i] > 0 ? 1 : 0); //mprintf("LEAF3: %5D %5D Skipping small node!\n", nvtxs, k); *r_ndirty += nvtxs*k; isleaf = 1; } else if (nvtxs < maxnvtxs && tpwgts[pid] >= (int)(minfrac*nvtxs)) { /* Determine if mostly one class */ //mprintf("LEAF2: %5D %5D %4D Almost pure node!\n", nvtxs, tpwgts[idxargmax(nparts, tpwgts)], idxargmax(nparts, tpwgts)); *r_naclean += nvtxs; isleaf = 1; } else { /* Check if all coordinates are the same */ for (dim=0; dim<3; dim++) { for (i=1; i<nvtxs; i++) { if (fabs(xyzcand[dim][0].key - xyzcand[dim][i].key) > DEPSILON) break; } if (i != nvtxs) break; } if (dim == 3) { /* All coordinates matched! Treat it as a dirty node! */ for (k=0, i=0; i<nparts; i++) k += (tpwgts[i] > 0 ? 1 : 0); mprintf("LEAF4: %5D %5D Skipping same coord-nodes! (%D %D)\n", nvtxs, k, isclean, part[xyzcand[0][0].val]); *r_ndirty += nvtxs*k; isleaf = 1; } } if (isleaf) { for (i=0; i<nvtxs; i++) { dtpart[xyzcand[0][i].val] = *r_nlnodes; dtipart[xyzcand[0][i].val] = pid; } dtree[mynodeID].leafid = (*r_nlnodes)++; dtree[mynodeID].partid = pid; dtree[mynodeID].left = dtree[mynodeID].right = -1; gk_free((void **)&tpwgts, LTERM); return mynodeID; } /*----------------------------------------------------------------------- * Find the best splitting point *-----------------------------------------------------------------------*/ pwgts[0] = idxmalloc(nparts, "InduceDecissionTree: pwgts[0]"); pwgts[1] = idxmalloc(nparts, "InduceDecissionTree: pwgts[1]"); /* Go and scan each dimension */ for (dim=0; dim<3; dim++) { /* Establish initial conditions for the scan */ idxcopy(nparts, tpwgts, pwgts[1]); idxset(nparts, 0, pwgts[0]); sum2[0] = sum2[1] = 0.0; for (j=0; j<nparts; j++) sum2[1] += (double)pwgts[1][j]*pwgts[1][j]; scores[dim] = -1.0; nleft = 0; /* Scan until you find a well-separated vertex */ for (i=0; i<nvtxs-1; i++) { pid = part[xyzcand[dim][i].val]; sum2[0] += 1.0 + (double)2*pwgts[0][pid]; sum2[1] += 1.0 - (double)2*pwgts[1][pid]; pwgts[0][pid]++; pwgts[1][pid]--; nleft++; if (fabs(xyzcand[dim][i].key < xyzcand[dim][i+1].key) > DEPSILON) { scores[dim] = sqrt(sum2[0])+sqrt(sum2[1]); points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0; lnvtxs[dim] = nleft; break; } } #ifdef PRINTSTAT if (i == nvtxs-1) mprintf("DTree: Initial Scan Along dim %D failed!\n", dim); #endif /* Continue with the rest */ for (i++; i<nvtxs-1; i++) { pid = part[xyzcand[dim][i].val]; sum2[0] += 1.0 + (double)2*pwgts[0][pid]; sum2[1] += 1.0 - (double)2*pwgts[1][pid]; pwgts[0][pid]++; pwgts[1][pid]--; nleft++; newscore = sqrt(sum2[0])+sqrt(sum2[1]); if (isclean) { if (i >= nvtxs/2) { if (fabs(xyzcand[dim][i].key - xyzcand[dim][i+1].key) < DEPSILON) continue; scores[dim] = xyzcand[dim][nvtxs-1].key - xyzcand[dim][0].key; /* Use the axis span as the score */ points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0; lnvtxs[dim] = nleft; break; } } else { if (newscore > scores[dim]) { if (fabs(xyzcand[dim][i].key - xyzcand[dim][i+1].key) < DEPSILON) continue; scores[dim] = newscore; points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0; lnvtxs[dim] = nleft; } } //mprintf("%5D %f %f %f\n", nleft, newscore, sum2[0], sum2[1]); } #ifdef PRINTSTAT /* Print some Stats */ if (scores[dim] >= 0) { mprintf("Dim: %3D, Score: %f, Point: %f [%5D %5D] [%f %f]\n", dim, scores[dim], points[dim], lnvtxs[dim], nvtxs-lnvtxs[dim], xyzcand[dim][0].key, xyzcand[dim][nvtxs-1].key); idxcopy(nparts, tpwgts, pwgts[1]); idxset(nparts, 0, pwgts[0]); for (i=0; i<lnvtxs[dim]; i++) { pid = part[xyzcand[dim][i].val]; pwgts[0][pid]++; pwgts[1][pid]--; } for (j=0; j<nparts; j++) if (pwgts[0][j]+pwgts[1][j] > 0) mprintf("%5D => %5D %5D\n", j, pwgts[0][j], pwgts[1][j]); } #endif } /* Determine the best overall score */ bestdim = 0; bestscore = scores[0]; bestpoint = points[0]; for (dim=1; dim<3; dim++) { if (scores[dim] > bestscore) { bestscore = scores[dim]; bestpoint = points[dim]; bestdim = dim; } } if (bestscore <= 0.0) errexit("Major Failure... Non-seperable! %4d nodes. Needs to be fixed!\n", nvtxs); dtree[mynodeID].dim = bestdim; dtree[mynodeID].value = bestpoint; //mprintf("BestDim: %D!\n", bestdim); /*----------------------------------------------------------------------- * Ok, now go and do the split *-----------------------------------------------------------------------*/ nleft = lnvtxs[bestdim]; nright = nvtxs - nleft; for (dim=0; dim<3; dim++) { lxyzcand[dim] = (DKeyValueType *)gk_malloc(sizeof(DKeyValueType)*nleft, "InduceDecissionTree: lxyzcand[dim]"); rxyzcand[dim] = (DKeyValueType *)gk_malloc(sizeof(DKeyValueType)*nright, "InduceDecissionTree: rxyzcand[dim]"); } /* Mark the left vertices */ for (i=0; i<nleft; i++) marker[xyzcand[bestdim][i].val] = 1; for (dim=0; dim<3; dim++) { for (nl=nr=0, i=0; i<nvtxs; i++) { if (marker[xyzcand[dim][i].val]) lxyzcand[dim][nl++] = xyzcand[dim][i]; else rxyzcand[dim][nr++] = xyzcand[dim][i]; } } /* Reset the marking */ for (i=0; i<nleft; i++) marker[xyzcand[bestdim][i].val] = 0; gk_free((void **)&tpwgts, &pwgts[0], &pwgts[1], LTERM); dtree[mynodeID].left = InduceDecissionTree(nleft, lxyzcand, sflag, nparts, part, maxnvtxs, minnvtxs, minfrac, r_nnodes, r_nlnodes, dtree, dtpart, dtipart, r_nclean, r_naclean, r_ndirty, &lmaxdepth, marker); dtree[mynodeID].right = InduceDecissionTree(nright, rxyzcand, sflag, nparts, part, maxnvtxs, minnvtxs, minfrac, r_nnodes, r_nlnodes, dtree, dtpart, dtipart, r_nclean, r_naclean, r_ndirty, &rmaxdepth, marker); *r_maxdepth += amax(lmaxdepth, rmaxdepth); gk_free((void **)&lxyzcand[0], &lxyzcand[1], &lxyzcand[2], &rxyzcand[0], &rxyzcand[1], &rxyzcand[2], LTERM); return mynodeID; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocGeneral2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); qsizes[i][0] = qsizes[i][1] = 0; } for (i=0; i<nvtxs; i++) { qnum[i] = samax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } /* printf("Weight Distribution: \t"); for (i=0; i<ncon; i++) printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); printf("\n"); */ for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = samax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } /* printf("Weight Distribution (after):\t "); for (i=0; i<ncon; i++) printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); printf("\n"); */ for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); minbal = origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal); } idxset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert all nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nvtxs, perm, 1); for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf(", %.3f LB: %.3f\n", minbal, newbal); } /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts)); } graph->mincut = mincut; graph->nbnd = nbnd; for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor) { int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; int qsizes[MAXNCON][2]; KeyValueType *cand; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 15), 100); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); qsizes[i][0] = qsizes[i][1] = 0; } for (i=0; i<nvtxs; i++) { qnum[i] = samax(ncon, nvwgt+i*ncon); qsizes[qnum[i]][where[i]]++; } for (from=0; from<2; from++) { for (j=0; j<ncon; j++) { if (qsizes[j][from] == 0) { for (i=0; i<nvtxs; i++) { if (where[i] != from) continue; k = samax2(ncon, nvwgt+i*ncon); if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) { qsizes[qnum[i]][from]--; qsizes[j][from]++; qnum[i] = j; } } } } } for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); newcut = mincut = graph->mincut; mincutorder = -1; idxset(nvtxs, -1, moved); /* Insert all nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nvtxs; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nvtxs, cand); for (ii=0; ii<nvtxs; ii++) { i = cand[ii].val; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (minbal < lbfactor) break; Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if (newbal < minbal || (newbal == minbal && (newcut < mincut || (newcut == mincut && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(oldgain), (float)(ed[k]-id[k])); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k; int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; FPQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[MAXNCON*2]; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->sendind; ed = graph->recvind; npwgts = graph->gnpwgts; bndptr = graph->sendptr; bndind = graph->recvptr; moved = idxmalloc(nvtxs, "moved"); swaps = idxmalloc(nvtxs, "swaps"); qnum = idxmalloc(nvtxs, "qnum"); cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { FPQueueInit(&parts[i][0], nvtxs); FPQueueInit(&parts[i][1], nvtxs); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); for (i=0; i<ncon; i++) { rtpwgts[i] = origbal*tpwgts[i]; rtpwgts[ncon+i] = origbal*tpwgts[ncon+i]; } idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { FPQueueReset(&parts[i][0]); FPQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); /* Insert boundary nodes in the priority queues */ nbnd = graph->gnvtxs; for (i=0; i<nbnd; i++) { cand[i].key = id[i]-ed[i]; cand[i].val = i; } ikeysort(nbnd, cand); for (ii=0; ii<nbnd; ii++) { i = bndind[cand[ii].val]; FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i])); } for (nswaps=0; nswaps<nvtxs; nswaps++) { Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1) break; saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[i]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ FPQueueDelete(&parts[qnum[k]][where[k]], k); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k])); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k])); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } graph->mincut = mincut; graph->gnvtxs = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { FPQueueFree(&parts[i][0]); FPQueueFree(&parts[i][1]); } GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM); return; }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Mc_SerialKWayAdaptRefine(GraphType *graph, int nparts, idxtype *home, float *orgubvec, int npasses) { int i, ii, iii, j, k; int nvtxs, ncon, pass, nmoves, myndegrees; int from, me, myhome, to, oldcut, gain, tmp; idxtype *xadj, *adjncy, *adjwgt; idxtype *where; EdgeType *mydegrees; RInfoType *rinfo, *myrinfo; float *npwgts, *nvwgt, *minwgt, *maxwgt, ubvec[MAXNCON]; int gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home; int zero_gain, better_balance_ft, better_balance_tt; KeyValueType *cand; int mype; MPI_Comm_rank(MPI_COMM_WORLD, &mype); nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; rinfo = graph->rinfo; npwgts = graph->gnpwgts; /* Setup the weight intervals of the various subdomains */ cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand"); minwgt = fmalloc(nparts*ncon, "minwgt"); maxwgt = fmalloc(nparts*ncon, "maxwgt"); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; i<ncon; i++) ubvec[i] = amax(ubvec[i], orgubvec[i]); for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = ubvec[j]/(float)nparts; minwgt[i*ncon+j] = ubvec[j]*(float)nparts; } } for (pass=0; pass<npasses; pass++) { oldcut = graph->mincut; for (i=0; i<nvtxs; i++) { cand[i].key = rinfo[i].id-rinfo[i].ed; cand[i].val = i; } ikeysort(nvtxs, cand); nmoves = 0; for (iii=0; iii<nvtxs; iii++) { i = cand[iii].val; myrinfo = rinfo+i; if (myrinfo->ed >= myrinfo->id) { from = where[i]; myhome = home[i]; nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; mydegrees = myrinfo->degrees; myndegrees = myrinfo->ndegrees; for (k=0; k<myndegrees; k++) { to = mydegrees[k].edge; gain = mydegrees[k].ewgt - myrinfo->id; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) { break; } } /* break out if you did not find a candidate */ if (k == myndegrees) continue; for (j=k+1; j<myndegrees; j++) { to = mydegrees[j].edge; going_home = (myhome == to); gain_is_same = (mydegrees[j].ewgt == mydegrees[k].ewgt); gain_is_greater = (mydegrees[j].ewgt > mydegrees[k].ewgt); fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon); better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, npwgts+to*ncon,nvwgt,ubvec); better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon, npwgts+to*ncon,nvwgt,ubvec); if ( (gain_is_greater && (fit_in_to || better_balance_ft) ) || (gain_is_same && ( (fit_in_to && going_home) || better_balance_tt ) ) ) { k = j; } } to = mydegrees[k].edge; going_home = (myhome == to); zero_gain = (mydegrees[k].ewgt == myrinfo->id); fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon, maxwgt+from*ncon); better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon, npwgts+to*ncon,nvwgt,ubvec); if (zero_gain && !going_home && !better_balance_ft && fit_in_from) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= mydegrees[k].ewgt-myrinfo->id; /* Update where, weight, and ID/ED information of the vertex you moved */ saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-mydegrees[k].ewgt; SWAP(myrinfo->id, mydegrees[k].ewgt, tmp); if (mydegrees[k].ewgt == 0) { myrinfo->ndegrees--; mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; } else mydegrees[k].edge = from; /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = rinfo+ii; mydegrees = myrinfo->degrees; if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution of the ed from 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (mydegrees[k].edge == from) { if (mydegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge; mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt; } else mydegrees[k].ewgt -= adjwgt[j]; break; } } } /* Add contribution of the ed to 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (mydegrees[k].edge == to) { mydegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { mydegrees[myrinfo->ndegrees].edge = to; mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } nmoves++; } } if (graph->mincut == oldcut) break; } GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM); return; }
/************************************************************************* * This function computes the assignment using the the objective the * minimization of the total volume of data that needs to move **************************************************************************/ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, WorkSpaceType *wspace, int npasses, int ncon) { int i, ii, j, k, nparts, mype; int pass, maxipwgt, nmapped, oldwgt, newwgt, done; idxtype *rowmap, *mylpwgts; KeyValueType *recv, send; int nsaved, gnsaved; mype = ctrl->mype; nparts = ctrl->nparts; recv = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv"); mylpwgts = idxmalloc(nparts, "mylpwgts"); done = nmapped = 0; idxset(nparts, -1, map); rowmap = idxset(nparts, -1, wspace->pv3); idxcopy(nparts, lpwgts, mylpwgts); for (pass=0; pass<npasses; pass++) { maxipwgt = idxamax(nparts, mylpwgts); if (mylpwgts[maxipwgt] > 0 && !done) { send.key = -mylpwgts[maxipwgt]; send.val = mype*nparts+maxipwgt; } else { send.key = 0; send.val = -1; } /* each processor sends its selection */ MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); ikeysort(nparts, recv); if (recv[0].key == 0) break; /* now make as many assignments as possible */ for (ii=0; ii<nparts; ii++) { i = recv[ii].val; if (i == -1) continue; j = i % nparts; k = i / nparts; if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; mylpwgts[j] = 0; if (mype == k) done = 1; } if (nmapped == nparts) break; } if (nmapped == nparts) break; } /* Map unmapped partitions */ if (nmapped < nparts) { for (i=j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (; i<nparts; i++) { if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) { for (i=0; i<nparts; i++) map[i] = i; IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); } else { /* check for a savings */ oldwgt = lpwgts[mype]; newwgt = lpwgts[rowmap[mype]]; nsaved = newwgt - oldwgt; gnsaved = GlobalSESum(ctrl, nsaved); /* undo everything if we don't see a savings */ if (gnsaved <= 0) { for (i=0; i<nparts; i++) map[i] = i; } IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); } GKfree((void **)&recv, (void **)&mylpwgts, LTERM); }
/************************************************************************* * This function is the entry point of the initial partition algorithm * that does recursive bissection. * This algorithm assembles the graph to all the processors and preceeds * by parallelizing the recursive bisection step. **************************************************************************/ void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j; int ncon, mype, npes, gnvtxs, ngroups; idxtype *xadj, *adjncy, *adjwgt, *vwgt; idxtype *part, *gwhere0, *gwhere1; idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt; GraphType *agraph; int lnparts, fpart, fpe, lnpes; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum; MPI_Comm ipcomm; struct { float sum; int rank; } lpesum, gpesum; ncon = graph->ncon; ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part"); xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj"); adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy"); adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt"); vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt"); idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(agraph->nvtxs+1, agraph->xadj, xadj); idxcopy(agraph->nedges, agraph->adjncy, adjncy); idxcopy(agraph->nedges, agraph->adjwgt, adjwgt); MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); gnvtxs = agraph->nvtxs; gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0"); gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1"); /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; /* Go into the recursive bisection */ /* ADD: consider changing this to breadth-first type bisection */ moptions[0] = 0; moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1; lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = npes; while (lnpes > 1 && lnparts > 1) { /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (mype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case npes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (mype == fpe) { for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart; } } /* In case npes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) gwhere0[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm); if (ngroups > 1) { tmpxadj = agraph->xadj; tmpadjncy = agraph->adjncy; tmpadjwgt = agraph->adjwgt; tmpvwgt = agraph->vwgt; tmpwhere = agraph->where; agraph->xadj = xadj; agraph->adjncy = adjncy; agraph->adjwgt = adjwgt; agraph->vwgt = vwgt; agraph->where = gwhere1; agraph->vwgt = vwgt; agraph->nvtxs = gnvtxs; Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec); lbsum = ssum(ncon, lbvec); edgecut = ComputeSerialEdgeCut(agraph); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm); lpesum.sum = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpesum.sum = (float) (edgecut); else lpesum.sum = (float) (max_cut); } MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank)); MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm); MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm); agraph->xadj = tmpxadj; agraph->adjncy = tmpadjncy; agraph->adjwgt = tmpadjwgt; agraph->vwgt = tmpvwgt; agraph->where = tmpwhere; } idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where); FreeGraph(agraph); MPI_Comm_free(&ipcomm); GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm)); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i; int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; int tewgt, tvsize, nmoved, maxin, maxout; float gtewgt, gtvsize, avg, maximb; int ps_relation, seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /* ADD: take care of disconnected graph */ /* ADD: take care of highly unbalanced vtxdist */ /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /**************************/ /* Set up data structures */ /**************************/ if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED; } SetUpCtrl(&ctrl, inparts, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts)); ctrl.ipc_factor = 1000.0; ctrl.redist_factor = 1.0; ctrl.redist_base = 1.0; ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = REFINE_PARTITION; ctrl.ps_relation = ps_relation; ctrl.tpwgts = itpwgts; graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize"); graph->home = idxmalloc(graph->nvtxs, "home"); if (ctrl.ps_relation == COUPLED) idxset(graph->nvtxs, mype, graph->home); else idxcopy(graph->nvtxs, part, graph->home); tewgt = idxsum(graph->nedges, graph->adjwgt); tvsize = idxsum(graph->nvtxs, graph->vsize); gtewgt = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs; gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs; ctrl.edge_size_ratio = gtewgt/gtvsize; scopy(incon, iubvec, ctrl.ubvec); PreAllocateMemory(&ctrl, graph, &wspace); /***********************/ /* Partition and Remap */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Adaptive_Partition(&ctrl, graph, &wspace); ParallelReMapGraph(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); idxcopy(graph->nvtxs, graph->where, part); if (edgecut != NULL) *edgecut = graph->mincut; /***********************/ /* Take care of output */ /***********************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout); rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout); } /*************************************/ /* Free memory, renumber, and return */ /*************************************/ GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM); GKfree((void **)&itpwgts, LTERM); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); return; }
DWORD WINAPI netGameLoop(void *param) { int len, c, x, y; Psquare p; TnetGameSettings *b; char *m, *u, *a; hostent *h; char buf[256]; if(!param){ buf[0]=(BYTE)C_INIT1; buf[1]=NETGAME_VERSION; wr(buf, 2); c=rd1(); if(c!=C_INIT2) goto le; c=rd1(); if(c<1) goto le; netGameVersion=c; SetForegroundWindow(hWin); h= gethostbyaddr((char*)&netGameIP.sin_addr, 4, netGameIP.sin_family); a= inet_ntoa(netGameIP.sin_addr); if(msg1(MB_YESNO|MB_ICONQUESTION, lng(868, "Do you want to play with %s [%s] ?"), (h && h->h_name) ? h->h_name : "???", a ? a : "???")!=IDYES){ wr1(C_INIT_DENY); goto le; } buf[0]=(BYTE)C_INFO; buf[1]=sizeof(TnetGameSettings); b= (TnetGameSettings*)(buf+2); b->width=(char)width; b->height=(char)height; b->begin= (player==1); b->rule5=(char)ruleFive; b->cont=(char)continuous; if(netGameVersion<2) b->rule5=b->cont=0; wr(buf, 2+sizeof(TnetGameSettings)); if(rd1()!=C_INFO_OK) goto le; b->begin= !b->begin; } else{ buf[0]=(BYTE)C_INIT2; buf[1]=NETGAME_VERSION; wr(buf, 2); c=rd1(); if(c==C_BUSY) wrLog(lng(874, "The other player is already playing with someone else")); if(c!=C_INIT1) goto le; c=rd1(); if(c<1) goto le; netGameVersion=c; wrLog(lng(871, "Connection established. Waiting for response...")); c=rd1(); if(c==C_INIT_DENY) wrLog(lng(870, "The other player doesn't want to play with you !")); if(c!=C_INFO) goto le; len=rd1(); b= (TnetGameSettings*)buf; memset(buf, 0, sizeof(TnetGameSettings)); if(len<2 || rd(buf, len)<0) goto le; wr1(C_INFO_OK); } SendMessage(hWin, WM_COMMAND, 992, (LPARAM)b); undoRequest=0; for(;;){ x=rd1(); switch(x){ case C_MSG: //message len=rd2(); if(len<=0) goto le; u=new char[2*len]; if(rd(u, 2*len)>=0){ m=new char[len+1]; WideCharToMultiByte(CP_ACP, 0, (WCHAR*)u, len, m, len, 0, 0); m[len]='\0'; wrLog("---> %s", m); delete[] m; SetWindowPos(logDlg, HWND_TOP, 0, 0, 0, 0, SWP_NOMOVE|SWP_NOSIZE|SWP_ASYNCWINDOWPOS|SWP_NOACTIVATE|SWP_SHOWWINDOW); } delete[] u; break; case C_UNDO_REQUEST: y=rd2(); if(y<=0 || y>moves) goto le; if(undoRequest){ //both players pressed undo simultaneously if(undoRequest<0) undoRequest=y; amax(undoRequest, y); postUndo(); } else{ c=msg1(MB_YESNO|MB_ICONQUESTION, lng(876, "The other player wants to UNDO the last move.\r\nDo you agree ?")); if(c==IDYES){ undoRequest=y; wr1(C_UNDO_YES); postUndo(); } else{ wr1(C_UNDO_NO); } } break; case C_NEW_REQUEST: if(undoRequest){ if(undoRequest<0){ //both players pressed NewGame simultaneously postNewGame(); } else{ postUndo(); } } else{ c=msg1(MB_YESNO|MB_ICONQUESTION, lng(877, "The other player wants to start a NEW game.\r\nDo you agree ?")); if(c==IDYES){ undoRequest=-1; wr1(C_NEW_YES); postNewGame(); } else{ wr1(C_NEW_NO); } } break; case C_UNDO_YES: if(undoRequest<=0) goto le; postUndo(); break; case C_UNDO_NO: if(undoRequest>0){ msglng(878, "Sorry, the other player does not allow to UNDO your move."); undoRequest=0; } break; case C_NEW_YES: if(undoRequest>=0) goto le; postNewGame(); break; case C_NEW_NO: if(undoRequest<0){ msglng(879, "Sorry, the other player does not allow to start a NEW game."); undoRequest=0; } break; default: //move or error if(x<0 || x>=width){ show(logDlg); goto le; } while(finished && (getTickCount()-lastTick<5000 || saveLock)){ Sleep(200); } if(finished) SendMessage(hWin, WM_COMMAND, 101, 0); y=rd1(); if(y<0 || y>=height) goto le; p=Square(x, y); p->time=rd4(); PostMessage(hWin, WM_COMMAND, 991, (LPARAM)p); } } le: EnterCriticalSection(&netLock); netGameDone(); LeaveCriticalSection(&netLock); return 0; }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON]; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, newcut, mincutorder; float *maxwgt, *minwgt, tvec[MAXNCON]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 15), 100); /* Setup the weight intervals of the two subdomains */ minwgt = fwspacemalloc(ctrl, 2*ncon); maxwgt = fwspacemalloc(ctrl, 2*ncon); for (i=0; i<2; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j]; minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]); } } /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal); for (i=0; i<ncon; i++) minbal[i] = origbal[i]; newcut = mincut = graph->mincut; mincutorder = -1; if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut); for (i=0; i<ncon; i++) printf("%.3f ", origbal[i]); printf("[B]\n"); } idxset(nvtxs, -1, moved); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert all nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nvtxs, perm, 1); for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { if (AreAllBelow(ncon, minbal, ubvec)) break; SelectQueue3(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, newbal); if (IsBetter2wayBalance(ncon, newbal, minbal, ubvec) || (IsBetter2wayBalance(ncon, newbal, origbal, ubvec) && newcut < mincut)) { mincut = newcut; for (i=0; i<ncon; i++) minbal[i] = newbal[i]; mincutorder = nswaps; } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (i=0; i<ncon; i++) printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec); printf(", LB: "); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); if (mincutorder == nswaps) printf(" *\n"); else printf("\n"); } /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update the queue position */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); /* Update its boundary information */ if (ed[k] == 0 && bndptr[k] != -1) BNDDelete(nbnd, bndind, bndptr, k); else if (ed[k] > 0 && bndptr[k] == -1) BNDInsert(nbnd, bndind, bndptr, k); } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (i=0; i<ncon; i++) printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]); printf("], LB: "); Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("\n"); } graph->mincut = mincut; graph->nbnd = nbnd; for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); fwspacefree(ctrl, 2*ncon); fwspacefree(ctrl, 2*ncon); }
void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, idxtype *hmarker, float ubfactor, int npasses) { int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize; idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; idxtype *mptr, *mind, *swaps, *perm, *inqueue; PQueueType parts; NRInfoType *rinfo; int higain, oldgain, mincut, initcut, mincutorder; int pass, from, to, limit; int badmaxpwgt, mindiff, newdiff; ASSERT(graph->mincut == graph->pwgts[2]); nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; rinfo = graph->nrinfo; PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt)); perm = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); mptr = idxwspacemalloc(ctrl, nvtxs+1); mind = idxwspacemalloc(ctrl, nvtxs); inqueue = idxwspacemalloc(ctrl, nvtxs); idxset(nvtxs, -1, inqueue); badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1])); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions-N1: [%6d %6d] Nv-Nb[%6d %6d] MaxPwgt[%6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, graph->mincut)); to = (pwgts[0] < pwgts[1] ? 1 : 0); for (pass=0; pass<npasses; pass++) { from = to; to = (from+1)%2; PQueueReset(&parts); mincutorder = -1; initcut = mincut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(where[i] == 2); if (hmarker[i] == -1 || hmarker[i] == to) { PQueueInsert(&parts, i, vwgt[i]-rinfo[i].edegrees[from]); inqueue[i] = pass; } } qsize = parts.nnodes; ASSERT(CheckNodeBnd(graph, nbnd)); ASSERT(CheckNodePartitionParams(graph)); limit = nbnd; /****************************************************** * Get into the FM loop *******************************************************/ mptr[0] = nmind = nbad = 0; mindiff = abs(pwgts[0]-pwgts[1]); for (nswaps=0; nswaps<nvtxs; nswaps++) { if ((higain = PQueueGetMax(&parts)) == -1) break; inqueue[higain] = -1; ASSERT(bndptr[higain] != -1); if (pwgts[to]+vwgt[higain] > badmaxpwgt) { /* Skip this vertex */ if (nbad++ > limit) break; else { nswaps--; continue; } } pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[from]); newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[from]-rinfo[higain].edegrees[from])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; nbad = 0; } else { if (nbad++ > limit) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[from]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ rinfo[k].edegrees[to] += vwgt[higain]; } else if (where[k] == from) { /* This vertex is pulled into the separator */ ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ where[k] = 2; pwgts[from] -= vwgt[k]; edegrees = rinfo[k].edegrees; edegrees[0] = edegrees[1] = 0; for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] != 2) edegrees[where[kk]] += vwgt[kk]; else { oldgain = vwgt[kk]-rinfo[kk].edegrees[from]; rinfo[kk].edegrees[from] -= vwgt[k]; /* Update the gain of this node if it was skipped */ if (inqueue[kk] == pass) PQueueUpdateUp(&parts, kk, oldgain, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue. Safe due to one-sided moves */ if (hmarker[k] == -1 || hmarker[k] == to) { PQueueInsert(&parts, k, vwgt[k]-edegrees[from]); inqueue[k] = pass; } } } mptr[nswaps+1] = nmind; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d to %3d, Gain: %5d [%5d] \t[%5d %5d %5d] [%3d %2d]\n", higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit)); } /**************************************************************** * Roll back computation *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); ASSERT(where[higain] == to); INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); where[higain] = 2; BNDInsert(nbnd, bndind, bndptr, higain); edegrees = rinfo[higain].edegrees; edegrees[0] = edegrees[1] = 0; for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) rinfo[k].edegrees[to] -= vwgt[higain]; else edegrees[where[k]] += vwgt[k]; } /* Push nodes out of the separator */ for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { k = mind[j]; ASSERT(where[k] == 2); where[k] = from; INC_DEC(pwgts[from], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] == 2) rinfo[kk].edegrees[from] += vwgt[k]; } } } ASSERT(mincut == pwgts[2]); IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d, QSIZE: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd, qsize)); graph->mincut = mincut; graph->nbnd = nbnd; if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut)) break; } PQueueFree(ctrl, &parts); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs+1); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function is the entry point of the initial balancing algorithm. * This algorithm assembles the graph to all the processors and preceeds * with the balancing step. **************************************************************************/ void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace) { int i, j, mype, npes, nvtxs, nedges, ncon; idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize; idxtype *part, *lwhere, *home; GraphType *agraph, cgraph; CtrlType myctrl; int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype; int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut; int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls; float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum; float rating, max_rating, your_cost = -1.0, your_balance = -1.0; float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2]; MPI_Status status; MPI_Comm ipcomm, srcomm; struct { float cost; int rank; } lpecost, gpecost; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr)); vtxdist = graph->vtxdist; agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace); nvtxs = cgraph.nvtxs = agraph->nvtxs; nedges = cgraph.nedges = agraph->nedges; ncon = cgraph.ncon = agraph->ncon; xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj"); vwgt = cgraph.vwgt = xadj + nvtxs+1; vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1; cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1; lwhere = xadj + nvtxs*(3+ncon)+1; home = xadj + nvtxs*(4+ncon)+1; adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1; adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges; /* ADD: this assumes that tpwgts for all constraints is the same */ /* ADD: this is necessary because serial metis does not support the general case */ mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts"); for (i=0; i<ctrl->nparts; i++) for (j=0; j<ncon; j++) mytpwgts[i] += ctrl->tpwgts[i*ncon+j]; for (i=0; i<ctrl->nparts; i++) mytpwgts[i] /= (float)ncon; idxcopy(nvtxs+1, agraph->xadj, xadj); idxcopy(nvtxs*ncon, agraph->vwgt, vwgt); idxcopy(nvtxs, agraph->vsize, vsize); idxcopy(nedges, agraph->adjncy, adjncy); idxcopy(nedges, agraph->adjwgt, adjwgt); /****************************************/ /****************************************/ if (ctrl->ps_relation == DISCOUPLED) { rcounts = imalloc(ctrl->npes, "rcounts"); rdispls = imalloc(ctrl->npes+1, "rdispls"); for (i=0; i<ctrl->npes; i++) { rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i]; } MAKECSR(i, ctrl->npes, rdispls); MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE, (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm); for (i=0; i<agraph->nvtxs; i++) home[i] = part[i]; GKfree((void **)&rcounts, (void **)&rdispls, LTERM); } else { for (i=0; i<ctrl->npes; i++) for (j=vtxdist[i]; j<vtxdist[i+1]; j++) part[j] = home[j] = i; } /* Ensure that the initial partitioning is legal */ for (i=0; i<agraph->nvtxs; i++) { if (part[i] >= ctrl->nparts) part[i] = home[i] = part[i] % ctrl->nparts; if (part[i] < 0) part[i] = home[i] = (-1*part[i]) % ctrl->nparts; } /****************************************/ /****************************************/ IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec)); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph))); for (i=0; i<agraph->ncon; i++) IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i])); IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n")); /****************************************/ /* Split the processors into two groups */ /****************************************/ sr = (ctrl->mype % 2 == 0) ? 1 : 0; gd = (ctrl->mype % 2 == 1) ? 1 : 0; if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) { sr = 1; gd = 0; } sr_pe = 0; gd_pe = 1; MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm); MPI_Comm_rank(ipcomm, &mype); MPI_Comm_size(ipcomm, &npes); myctrl.dbglvl = 0; myctrl.mype = mype; myctrl.npes = npes; myctrl.comm = ipcomm; myctrl.sync = ctrl->sync; myctrl.seed = ctrl->seed; myctrl.nparts = ctrl->nparts; myctrl.ipc_factor = ctrl->ipc_factor; myctrl.redist_factor = ctrl->redist_base; myctrl.partType = ADAPTIVE_PARTITION; myctrl.ps_relation = DISCOUPLED; myctrl.tpwgts = ctrl->tpwgts; icopy(ncon, ctrl->tvwgts, myctrl.tvwgts); icopy(ncon, ctrl->ubvec, myctrl.ubvec); if (sr == 1) { /*******************************************/ /* Half of the processors do scratch-remap */ /*******************************************/ ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1); MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm); MPI_Comm_rank(srcomm, &srmype); MPI_Comm_size(srcomm, &srnpes); moptions[0] = 0; moptions[7] = ctrl->sync + (mype % ngroups) + 1; idxset(nvtxs, 0, lwhere); lnparts = ctrl->nparts; fpart = fpe = 0; lnpes = srnpes; while (lnpes > 1 && lnparts > 1) { ASSERT(ctrl, agraph->nvtxs > 1); /* Determine the weights of the partitions */ mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart); mytpwgts2[1] = 1.0-mytpwgts2[0]; if (agraph->ncon == 1) { METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } else { METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut, part); } wsum = ssum(lnparts/2, mytpwgts+fpart); sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart); sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2); /* I'm picking the left branch */ if (srmype < fpe+lnpes/2) { Mc_KeepPart(agraph, wspace, part, 0); lnpes = lnpes/2; lnparts = lnparts/2; } else { Mc_KeepPart(agraph, wspace, part, 1); fpart = fpart + lnparts/2; fpe = fpe + lnpes/2; lnpes = lnpes - lnpes/2; lnparts = lnparts - lnparts/2; } } /* In case srnpes is greater than or equal to nparts */ if (lnparts == 1) { /* Only the first process will assign labels (for the reduction to work) */ if (srmype == fpe) { for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart; } } /* In case srnpes is smaller than nparts */ else { if (ncon == 1) METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); else METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions, &edgecut, part); for (i=0; i<agraph->nvtxs; i++) lwhere[agraph->label[i]] = fpart + part[i]; } MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm); edgecut = ComputeSerialEdgeCut(&cgraph); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = (float)edgecut; else lpecost.cost = (float)max_cut + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) { MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm); } if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) { MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); } if (ctrl->mype == sr_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } MPI_Comm_free(&srcomm); } /**************************************/ /* The other half do global diffusion */ /**************************************/ else { /******************************************************************/ /* The next stmt is required to balance out the sr MPI_Comm_split */ /******************************************************************/ MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm); if (ncon == 1) { rating = WavefrontDiffusion(&myctrl, agraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); lbsum = ssum(ncon, lbvec); /* Determine which PE computed the best partitioning */ MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm); MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm); lpecost.rank = ctrl->mype; lpecost.cost = lbsum; if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) { if (lbsum < UNBALANCE_FRACTION * (float)(ncon)) lpecost.cost = rating; else lpecost.cost = max_rating + lbsum; } MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm); /* Now send this to the coordinating processor */ if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe) MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm); if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe) MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status); if (ctrl->mype == gd_pe) { idxcopy(nvtxs, part, lwhere); SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts); } } else { Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES); } } if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) { if (ctrl->mype == sr_pe || ctrl->mype == gd_pe) { /********************************************************************/ /* The coordinators from each group decide on the best partitioning */ /********************************************************************/ my_cut = (float) ComputeSerialEdgeCut(&cgraph); my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home); Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec); my_balance = ssum(cgraph.ncon, lbvec); my_balance /= (float) cgraph.ncon; my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv; IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n", (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance)); if (ctrl->mype == gd_pe) { buffer[0] = my_cost; buffer[1] = my_balance; MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm); } else { MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status); your_cost = buffer[0]; your_balance = buffer[1]; } } if (ctrl->mype == sr_pe) { who_wins = gd_pe; if ((my_balance < 1.1 && your_balance > 1.1) || (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) || (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) { who_wins = sr_pe; } } MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm); } else { who_wins = sr_pe; } MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm); idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where); MPI_Comm_free(&ipcomm); GKfree((void **)&xadj, (void **)&mytpwgts, LTERM); /* For whatever reason, FreeGraph crashes here...so explicitly free the memory. FreeGraph(agraph); */ GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM); GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM); GKfree((void **)&agraph, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr)); }
/*********************************************************************************** * This function is the entry point of the parallel kmetis algorithm that uses * coordinates to compute an initial graph distribution. ************************************************************************************/ void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int h, i, j; int nvtxs = -1, npes, mype; int uwgtflag, cut, gcut, maxnvtxs; int ltvwgts[MAXNCON]; int moptions[10]; CtrlType ctrl; idxtype *uvwgt; WorkSpaceType wspace; GraphType *graph, *mgraph; float avg, maximb, balance, *mytpwgts; int seed, dbglvl = 0; int iwgtflag, inumflag, incon, inparts, ioptions[10]; float *itpwgts, iubvec[MAXNCON]; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); /********************************/ /* Try and take care bad inputs */ /********************************/ if (options != NULL && options[0] == 1) dbglvl = options[PMV3_OPTION_DBGLVL]; CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag, ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, NULL, NULL, options, ioptions, part, comm); /*********************************/ /* Take care the nparts = 1 case */ /*********************************/ if (inparts <= 1) { idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); *edgecut = 0; return; } /******************************/ /* Take care of npes = 1 case */ /******************************/ if (npes == 1 && inparts > 1) { moptions[0] = 0; nvtxs = vtxdist[1]; if (incon == 1) { METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, itpwgts, moptions, edgecut, part); } else { /* ADD: this is because METIS does not support tpwgts for all constraints */ mytpwgts = fmalloc(inparts, "mytpwgts"); for (i=0; i<inparts; i++) mytpwgts[i] = itpwgts[i*incon]; moptions[7] = -1; METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, &inparts, mytpwgts, moptions, edgecut, part); free(mytpwgts); } return; } if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); /*****************************/ /* Set up control structures */ /*****************************/ if (ioptions[0] == 1) { dbglvl = ioptions[PMV3_OPTION_DBGLVL]; seed = ioptions[PMV3_OPTION_SEED]; } else { dbglvl = GLOBAL_DBGLVL; seed = GLOBAL_SEED; } SetUpCtrl(&ctrl, npes, dbglvl, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts)); ctrl.seed = (seed == 0) ? mype : seed*mype; ctrl.sync = GlobalSEMax(&ctrl, seed); ctrl.partType = STATIC_PARTITION; ctrl.ps_relation = -1; ctrl.tpwgts = itpwgts; scopy(incon, iubvec, ctrl.ubvec); uwgtflag = iwgtflag|2; uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt"); graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag); free(graph->nvwgt); graph->nvwgt = NULL; PreAllocateMemory(&ctrl, graph, &wspace); /*================================================================= * Compute the initial npes-way partitioning geometric partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); /*================================================================= * Move the graph according to the partitioning =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr)); free(uvwgt); graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt"); graph->ncon = incon; j = ctrl.nparts; ctrl.nparts = ctrl.npes; mgraph = Moc_MoveGraph(&ctrl, graph, &wspace); ctrl.nparts = j; /**********************************************************/ /* Do the same functionality as Moc_SetUpGraph for mgraph */ /**********************************************************/ /* compute tvwgts */ for (j=0; j<incon; j++) ltvwgts[j] = 0; for (i=0; i<graph->nvtxs; i++) for (j=0; j<incon; j++) ltvwgts[j] += mgraph->vwgt[i*incon+j]; for (j=0; j<incon; j++) ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]); /* check for zero wgt constraints */ for (i=0; i<incon; i++) { /* ADD: take care of the case in which tvwgts is zero */ if (ctrl.tvwgts[i] == 0) { if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i); MPI_Finalize(); exit(-1); } } /* compute nvwgt */ mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt"); for (i=0; i<mgraph->nvtxs; i++) for (j=0; j<incon; j++) mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr)); if (ctrl.dbglvl&DBG_INFO) { cut = 0; for (i=0; i<graph->nvtxs; i++) for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (graph->where[i] != graph->where[graph->adjncy[j]]) cut += graph->adjwgt[j]; gcut = GlobalSESum(&ctrl, cut)/2; maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs); balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes)); rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n", gcut, balance, maxnvtxs, graph->gnvtxs, npes); } /*================================================================= * Set up the newly moved graph =================================================================*/ IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr)); ctrl.nparts = inparts; FreeWSpace(&wspace); PreAllocateMemory(&ctrl, mgraph, &wspace); /*======================================================= * Now compute the partition of the moved graph =======================================================*/ if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) { IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes])); PartitionSmallGraph(&ctrl, mgraph, &wspace); } else { Moc_Global_Partition(&ctrl, mgraph, &wspace); } ParallelReMapGraph(&ctrl, mgraph, &wspace); /* Invert the ordering back to the original graph */ ctrl.nparts = npes; ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace); *edgecut = mgraph->mincut; IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr)); /*******************/ /* Print out stats */ /*******************/ IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl)); IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm)); if (ctrl.dbglvl&DBG_INFO) { rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut); avg = 0.0; for (h=0; h<incon; h++) { maximb = 0.0; for (i=0; i<inparts; i++) maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]); avg += maximb; rprintf(&ctrl, "%.3f ", maximb); } rprintf(&ctrl, " avg: %.3f\n", avg/(float)incon); } GKfree((void **)&itpwgts, LTERM); FreeGraph(mgraph); FreeInitialGraphAndRemap(graph, iwgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (inumflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, idxtype *hmarker, float ubfactor, int npasses) { int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind; idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr; idxtype *mptr, *mind, *moved, *swaps, *perm; PQueueType parts[2]; NRInfoType *rinfo; int higain, oldgain, mincut, initcut, mincutorder; int pass, to, other, limit; int badmaxpwgt, mindiff, newdiff; int u[2], g[2]; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; bndind = graph->bndind; bndptr = graph->bndptr; where = graph->where; pwgts = graph->pwgts; rinfo = graph->nrinfo; i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt); PQueueInit(ctrl, &parts[0], nvtxs, i); PQueueInit(ctrl, &parts[1], nvtxs, i); moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); mptr = idxwspacemalloc(ctrl, nvtxs+1); mind = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1])); for (pass=0; pass<npasses; pass++) { idxset(nvtxs, -1, moved); PQueueReset(&parts[0]); PQueueReset(&parts[1]); mincutorder = -1; initcut = mincut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(where[i] == 2); if (hmarker[i] == -1) { PQueueInsert(&parts[0], i, vwgt[i]-rinfo[i].edegrees[1]); PQueueInsert(&parts[1], i, vwgt[i]-rinfo[i].edegrees[0]); moved[i] = -5; } else if (hmarker[i] != 2) { PQueueInsert(&parts[hmarker[i]], i, vwgt[i]-rinfo[i].edegrees[(hmarker[i]+1)%2]); moved[i] = -(10+hmarker[i]); } } ASSERT(CheckNodeBnd(graph, nbnd)); ASSERT(CheckNodePartitionParams(graph)); limit = nbnd; /****************************************************** * Get into the FM loop *******************************************************/ mptr[0] = nmind = 0; mindiff = abs(pwgts[0]-pwgts[1]); to = (pwgts[0] < pwgts[1] ? 0 : 1); for (nswaps=0; nswaps<nvtxs; nswaps++) { u[0] = PQueueSeeMax(&parts[0]); u[1] = PQueueSeeMax(&parts[1]); if (u[0] != -1 && u[1] != -1) { g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1]; g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0]; to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) to = (to+1)%2; } else if (u[0] == -1 && u[1] == -1) { break; } else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) { to = 0; } else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) { to = 1; } else break; other = (to+1)%2; higain = PQueueGetMax(&parts[to]); /* Delete its matching entry in the other queue */ if (moved[higain] == -5) PQueueDelete(&parts[other], higain, vwgt[higain]-rinfo[higain].edegrees[to]); ASSERT(bndptr[higain] != -1); pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]); newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other])); if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) { mincut = pwgts[2]; mincutorder = nswaps; mindiff = newdiff; } else { if (nswaps - mincutorder > limit) { pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]); break; /* No further improvement, break out */ } } BNDDelete(nbnd, bndind, bndptr, higain); pwgts[to] += vwgt[higain]; where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /********************************************************** * Update the degrees of the affected nodes ***********************************************************/ for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */ oldgain = vwgt[k]-rinfo[k].edegrees[to]; rinfo[k].edegrees[to] += vwgt[higain]; if (moved[k] == -5 || moved[k] == -(10+other)) PQueueUpdate(&parts[other], k, oldgain, oldgain-vwgt[higain]); } else if (where[k] == other) { /* This vertex is pulled into the separator */ ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k])); BNDInsert(nbnd, bndind, bndptr, k); mind[nmind++] = k; /* Keep track for rollback */ where[k] = 2; pwgts[other] -= vwgt[k]; edegrees = rinfo[k].edegrees; edegrees[0] = edegrees[1] = 0; for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] != 2) edegrees[where[kk]] += vwgt[kk]; else { oldgain = vwgt[kk]-rinfo[kk].edegrees[other]; rinfo[kk].edegrees[other] -= vwgt[k]; if (moved[kk] == -5 || moved[kk] == -(10+to)) PQueueUpdate(&parts[to], kk, oldgain, oldgain+vwgt[k]); } } /* Insert the new vertex into the priority queue (if it has not been moved). */ if (moved[k] == -1 && (hmarker[k] == -1 || hmarker[k] == to)) { PQueueInsert(&parts[to], k, vwgt[k]-edegrees[other]); moved[k] = -(10+to); } #ifdef FULLMOVES /* this does not work as well as the above partial one */ if (moved[k] == -1) { if (hmarker[k] == -1) { PQueueInsert(&parts[0], k, vwgt[k]-edegrees[1]); PQueueInsert(&parts[1], k, vwgt[k]-edegrees[0]); moved[k] = -5; } else if (hmarker[k] != 2) { PQueueInsert(&parts[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]); moved[k] = -(10+hmarker[k]); } } #endif } } mptr[nswaps+1] = nmind; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d to %3d, Gain: %5d [%5d] [%4d %4d] \t[%5d %5d %5d]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2])); } /**************************************************************** * Roll back computation *****************************************************************/ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; ASSERT(CheckNodePartitionParams(graph)); to = where[higain]; other = (to+1)%2; INC_DEC(pwgts[2], pwgts[to], vwgt[higain]); where[higain] = 2; BNDInsert(nbnd, bndind, bndptr, higain); edegrees = rinfo[higain].edegrees; edegrees[0] = edegrees[1] = 0; for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; if (where[k] == 2) rinfo[k].edegrees[to] -= vwgt[higain]; else edegrees[where[k]] += vwgt[k]; } /* Push nodes out of the separator */ for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) { k = mind[j]; ASSERT(where[k] == 2); where[k] = other; INC_DEC(pwgts[other], pwgts[2], vwgt[k]); BNDDelete(nbnd, bndind, bndptr, k); for (jj=xadj[k]; jj<xadj[k+1]; jj++) { kk = adjncy[jj]; if (where[kk] == 2) rinfo[kk].edegrees[other] += vwgt[k]; } } } ASSERT(mincut == pwgts[2]); IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut >= initcut) break; } PQueueFree(ctrl, &parts[0]); PQueueFree(ctrl, &parts[1]); idxwspacefree(ctrl, nvtxs+1); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function converts a mesh into a dual graph **************************************************************************/ void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, int *numflag, int *ncommonnodes, idxtype **xadj, idxtype **adjncy, MPI_Comm *comm) { int i, j, jj, k, kk, m; int npes, mype, pe, count, mask, pass; int nelms, lnns, my_nns, node; int firstelm, firstnode, lnode, nrecv, nsend; int *scounts, *rcounts, *sdispl, *rdispl; idxtype *nodedist, *nmap, *auxarray; idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL; idxtype *sbuffer, *rbuffer, *htable; KeyValueType *nodelist, *recvbuffer; idxtype ind[200], wgt[200]; int gmaxnode, gminnode; CtrlType ctrl; SetUpCtrl(&ctrl, -1, 0, *comm); npes = ctrl.npes; mype = ctrl.mype; nelms = elmdist[mype+1]-elmdist[mype]; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1); mask = (1<<11)-1; /*****************************/ /* Determine number of nodes */ /*****************************/ gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]); for (i=0; i<eptr[nelms]; i++) eind[i] -= gminnode; gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]); /**************************/ /* Check for input errors */ /**************************/ ASSERTS(nelms > 0); /* construct node distribution array */ nodedist = idxsmalloc(npes+1, 0, "nodedist"); for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) { k = j/(npes-i); nodedist[i+1] = nodedist[i]+k; j -= k; } my_nns = nodedist[mype+1]-nodedist[mype]; firstnode = nodedist[mype]; nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist"); auxarray = idxmalloc(eptr[nelms], "auxarray"); htable = idxsmalloc(amax(my_nns, mask+1), -1, "htable"); scounts = imalloc(4*npes+2, "scounts"); rcounts = scounts+npes; sdispl = scounts+2*npes; rdispl = scounts+3*npes+1; /*********************************************/ /* first find a local numbering of the nodes */ /*********************************************/ for (i=0; i<nelms; i++) { for (j=eptr[i]; j<eptr[i+1]; j++) { nodelist[j].key = eind[j]; nodelist[j].val = j; auxarray[j] = i; /* remember the local element ID that uses this node */ } } ikeysort(eptr[nelms], nodelist); for (count=1, i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) count++; } lnns = count; nmap = idxmalloc(lnns, "nmap"); /* renumber the nodes of the elements array */ count = 1; nmap[0] = nodelist[0].key; eind[nodelist[0].val] = 0; nodelist[0].val = auxarray[nodelist[0].val]; /* Store the local element ID */ for (i=1; i<eptr[nelms]; i++) { if (nodelist[i].key > nodelist[i-1].key) { nmap[count] = nodelist[i].key; count++; } eind[nodelist[i].val] = count-1; nodelist[i].val = auxarray[nodelist[i].val]; /* Store the local element ID */ } MPI_Barrier(*comm); /**********************************************************/ /* perform comms necessary to construct node-element list */ /**********************************************************/ iset(npes, 0, scounts); for (pe=i=0; i<eptr[nelms]; i++) { while (nodelist[i].key >= nodedist[pe+1]) pe++; scounts[pe] += 2; } ASSERTS(pe < npes); MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); ASSERTS(sdispl[npes] == eptr[nelms]*2); nrecv = rdispl[npes]/2; recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer"); MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); /**************************************/ /* construct global node-element list */ /**************************************/ gnptr = idxsmalloc(my_nns+1, 0, "gnptr"); for (i=0; i<npes; i++) { for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; ASSERTS(lnode >= 0 && lnode < my_nns) gnptr[lnode]++; } } MAKECSR(i, my_nns, gnptr); gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind"); for (pe=0; pe<npes; pe++) { firstelm = elmdist[pe]; for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm; } } SHIFTCSR(i, my_nns, gnptr); /*********************************************************/ /* send the node-element info to the relevant processors */ /*********************************************************/ iset(npes, 0, scounts); /* use a hash table to ensure that each node is sent to a proc only once */ for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { scounts[pe] += gnptr[lnode+1]-gnptr[lnode]; htable[lnode] = 1; } } /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm); icopy(npes, scounts, sdispl); MAKECSR(i, npes, sdispl); /* create the send buffer */ nsend = sdispl[npes]; sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend)); count = 0; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; if (htable[lnode] == -1) { for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) { if (k == gnptr[lnode]) sbuffer[count++] = -1*(gnind[k]+1); else sbuffer[count++] = gnind[k]; } htable[lnode] = 1; } } ASSERTS(count == sdispl[pe+1]); /* now reset the hash table */ for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) { lnode = recvbuffer[j].key-firstnode; htable[lnode] = -1; } } icopy(npes, rcounts, rdispl); MAKECSR(i, npes, rdispl); nrecv = rdispl[npes]; rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv)); MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, rcounts, rdispl, IDX_DATATYPE, *comm); k = -1; nptr = idxsmalloc(lnns+1, 0, "nptr"); nind = rbuffer; for (pe=0; pe<npes; pe++) { for (j=rdispl[pe]; j<rdispl[pe+1]; j++) { if (nind[j] < 0) { k++; nind[j] = (-1*nind[j])-1; } nptr[k]++; } } MAKECSR(i, lnns, nptr); ASSERTS(k+1 == lnns); ASSERTS(nptr[lnns] == nrecv) myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj"); idxset(mask+1, -1, htable); firstelm = elmdist[mype]; /* Two passes -- in first pass, simply find out the memory requirements */ for (pass=0; pass<2; pass++) { for (i=0; i<nelms; i++) { for (count=0, j=eptr[i]; j<eptr[i+1]; j++) { node = eind[j]; for (k=nptr[node]; k<nptr[node+1]; k++) { if ((kk=nind[k]) == firstelm+i) continue; m = htable[(kk&mask)]; if (m == -1) { ind[count] = kk; wgt[count] = 1; htable[(kk&mask)] = count++; } else { if (ind[m] == kk) { wgt[m]++; } else { for (jj=0; jj<count; jj++) { if (ind[jj] == kk) { wgt[jj]++; break; } } if (jj == count) { ind[count] = kk; wgt[count++] = 1; } } } } } for (j=0; j<count; j++) { htable[(ind[j]&mask)] = -1; if (wgt[j] >= *ncommonnodes) { if (pass == 0) myxadj[i]++; else myadjncy[myxadj[i]++] = ind[j]; } } } if (pass == 0) { MAKECSR(i, nelms, myxadj); myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy"); } else { SHIFTCSR(i, nelms, myxadj); } } /*****************************************/ /* correctly renumber the elements array */ /*****************************************/ for (i=0; i<eptr[nelms]; i++) eind[i] = nmap[eind[i]] + gminnode; if (*numflag == 1) ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0); /* do not free nodelist, recvbuffer, rbuffer */ GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, (void **)&gnind, (void **)&auxarray, LTERM); FreeCtrl(&ctrl); return; }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void MCRandom_KWayEdgeRefineHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, float *orgubvec, int npasses) { int i, ii, iii, j, /*jj,*/ k, /*l,*/ pass, nvtxs, ncon, nmoves, nbnd, myndegrees, same; int from, me, to, oldcut, gain; idxtype *xadj, *adjncy, *adjwgt; idxtype *where, *perm, *bndptr, *bndind; EDegreeType *myedegrees; RInfoType *myrinfo; float *npwgts, *nvwgt, *minwgt, *maxwgt, maxlb, minlb, ubvec[MAXNCON], tvec[MAXNCON]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; bndptr = graph->bndptr; bndind = graph->bndind; where = graph->where; npwgts = graph->npwgts; /* Setup the weight intervals of the various subdomains */ minwgt = fwspacemalloc(ctrl, nparts*ncon); maxwgt = fwspacemalloc(ctrl, nparts*ncon); /* See if the orgubvec consists of identical constraints */ maxlb = minlb = orgubvec[0]; for (i=1; i<ncon; i++) { minlb = (orgubvec[i] < minlb ? orgubvec[i] : minlb); maxlb = (orgubvec[i] > maxlb ? orgubvec[i] : maxlb); } same = (fabs(maxlb-minlb) < .01 ? 1 : 0); /* Let's not get very optimistic. Let Balancing do the work */ ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec); for (i=0; i<ncon; i++) ubvec[i] = amax(ubvec[i], orgubvec[i]); if (!same) { for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = ubvec[j]/nparts; minwgt[i*ncon+j] = 1.0/(ubvec[j]*nparts); } } } else { maxlb = ubvec[0]; for (i=1; i<ncon; i++) maxlb = (ubvec[i] > maxlb ? ubvec[i] : maxlb); for (i=0; i<nparts; i++) { for (j=0; j<ncon; j++) { maxwgt[i*ncon+j] = maxlb/nparts; minwgt[i*ncon+j] = 1.0/(maxlb*nparts); } } } perm = idxwspacemalloc(ctrl, nvtxs); if (ctrl->dbglvl&DBG_REFINE) { printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ", npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], graph->nvtxs, graph->nbnd, graph->mincut); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("\n"); } for (pass=0; pass<npasses; pass++) { ASSERT(ComputeCut(graph, where) == graph->mincut); oldcut = graph->mincut; nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (nmoves=iii=0; iii<graph->nbnd; iii++) { ii = perm[iii]; if (ii >= nbnd) continue; i = bndind[ii]; myrinfo = graph->rinfo+i; if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */ from = where[i]; nvwgt = graph->nvwgt+i*ncon; if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) continue; /* This cannot be moved! */ myedegrees = myrinfo->edegrees; myndegrees = myrinfo->ndegrees; for (k=0; k<myndegrees; k++) { to = myedegrees[k].pid; gain = myedegrees[k].ed - myrinfo->id; if (gain >= 0 && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) break; } if (k == myndegrees) continue; /* break out if you did not find a candidate */ for (j=k+1; j<myndegrees; j++) { to = myedegrees[j].pid; if ((myedegrees[j].ed > myedegrees[k].ed && (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) || (myedegrees[j].ed == myedegrees[k].ed && IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec))) k = j; } to = myedegrees[k].pid; if (myedegrees[k].ed-myrinfo->id == 0 && !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec) && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon)) continue; /*===================================================================== * If we got here, we can now move the vertex from 'from' to 'to' *======================================================================*/ graph->mincut -= myedegrees[k].ed-myrinfo->id; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut)); /* Update where, weight, and ID/ED information of the vertex you moved */ saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1); where[i] = to; myrinfo->ed += myrinfo->id-myedegrees[k].ed; SWAP(myrinfo->id, myedegrees[k].ed, j); if (myedegrees[k].ed == 0) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].pid = from; if (myrinfo->ed-myrinfo->id < 0) BNDDelete(nbnd, bndind, bndptr, i); /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { ii = adjncy[j]; me = where[ii]; myrinfo = graph->rinfo+ii; if (myrinfo->edegrees == NULL) { myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree; ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii]; } myedegrees = myrinfo->edegrees; ASSERT(CheckRInfo(myrinfo)); if (me == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1) BNDInsert(nbnd, bndind, bndptr, ii); } else if (me == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1) BNDDelete(nbnd, bndind, bndptr, ii); } /* Remove contribution from the .ed of 'from' */ if (me != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == from) { if (myedegrees[k].ed == adjwgt[j]) myedegrees[k] = myedegrees[--myrinfo->ndegrees]; else myedegrees[k].ed -= adjwgt[j]; break; } } } /* Add contribution to the .ed of 'to' */ if (me != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (myedegrees[k].pid == to) { myedegrees[k].ed += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { myedegrees[myrinfo->ndegrees].pid = to; myedegrees[myrinfo->ndegrees++].ed = adjwgt[j]; } } ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]); ASSERT(CheckRInfo(myrinfo)); } nmoves++; } } graph->nbnd = nbnd; if (ctrl->dbglvl&DBG_REFINE) { printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ", npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], nbnd, nmoves, graph->mincut); ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec); for (i=0; i<ncon; i++) printf("%.3f ", tvec[i]); printf("\n"); } if (graph->mincut == oldcut) break; } fwspacefree(ctrl, ncon*nparts); fwspacefree(ctrl, ncon*nparts); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, float *tpwgts, int npasses) { int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum; idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind; idxtype *moved, *swaps, *perm, *qnum; float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal; PQueueType parts[MAXNCON][2]; int higain, oldgain, mincut, initcut, newcut, mincutorder; float rtpwgts[2]; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; npwgts = graph->npwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); qnum = idxwspacemalloc(ctrl, nvtxs); limit = amin(amax(0.01*nvtxs, 25), 150); /* Initialize the queues */ for (i=0; i<ncon; i++) { PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1); PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1); } for (i=0; i<nvtxs; i++) qnum[i] = samax(ncon, nvwgt+i*ncon); origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); rtpwgts[0] = origbal*tpwgts[0]; rtpwgts[1] = origbal*tpwgts[1]; /* if (ctrl->dbglvl&DBG_REFINE) { printf("Parts: ["); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal); } */ idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ for (i=0; i<ncon; i++) { PQueueReset(&parts[i][0]); PQueueReset(&parts[i][1]); } mincutorder = -1; newcut = mincut = initcut = graph->mincut; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); minbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = bndind[perm[ii]]; ASSERT(ed[i] > 0 || id[i] == 0); ASSERT(bndptr[i] != -1); PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts); to = (from+1)%2; if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1) break; ASSERT(bndptr[higain] != -1); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); newcut -= (ed[higain]-id[higain]); newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts); if ((newcut < mincut && newbal-origbal <= .00001) || (newcut == mincut && (newbal < minbal || (newbal == minbal && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) { mincut = newcut; minbal = newbal; mincutorder = nswaps; for (i=0; i<ncon; i++) mindiff[i] = fabs(tpwgts[0]-npwgts[i]); } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; /* if (ctrl->dbglvl&DBG_MOVEINFO) { printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf(", %.3f LB: %.3f\n", minbal, newbal); } */ /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1); saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } /* if (ctrl->dbglvl&DBG_REFINE) { printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd); for (l=0; l<ncon; l++) printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]); printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts)); } */ graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut == initcut) break; } for (i=0; i<ncon; i++) { PQueueFree(ctrl, &parts[i][0]); PQueueFree(ctrl, &parts[i][1]); } idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) { int h, i, ii, iii, j, k, c; int pass, nvtxs, nedges, ncon; int nmoves, nmoved, nswaps, nzgswaps; /* int gnswaps, gnzgswaps; */ int me, firstvtx, lastvtx, yourlastvtx; int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; int nlupd, nsupd, nnbrs, nchanged; idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; idxtype *where, *tmp_where, *moved; floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; idxtype *update, *supdate, *rupdate, *pe_updates; idxtype *changed, *perm, *pperm, *htable; idxtype *peind, *recvptr, *sendptr; KeyValueType *swchanges, *rwchanges; RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; int *nupds_pe; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); /*************************/ /* set up common aliases */ /*************************/ nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; ladjncy = graph->adjncy; adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->rinfo; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; ubvec = ctrl->ubvec; tpwgts = ctrl->tpwgts; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWR: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; /************************************/ /* set up important data structures */ /************************************/ perm = idxmalloc(nvtxs, "KWR: perm"); pperm = idxmalloc(nparts, "KWR: pperm"); update = idxmalloc(nvtxs, "KWR: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWR: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); for (i=0; i<nparts; i++) { for (h=0; h<ncon; h++) { badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h]; } } movewgts = fmalloc(nparts*ncon, "KWR: movewgts"); ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts"); pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts"); overfill = fmalloc(nparts*ncon, "KWR: overfill"); moved = idxmalloc(nvtxs, "KWR: moved"); tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where"); tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); idxcopy(nvtxs+graph->nrecv, where, tmp_where); for (i=0; i<nvtxs; i++) { tmp_rinfo[i].id = rinfo[i].id; tmp_rinfo[i].ed = rinfo[i].ed; tmp_rinfo[i].ndegrees = rinfo[i].ndegrees; tmp_rinfo[i].degrees = tmp_edegrees+xadj[i]; for (j=0; j<rinfo[i].ndegrees; j++) { tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge; tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt; } } nswaps = nzgswaps = 0; /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; pass<npasses; pass++) { if (mype == 0) RandomPermute(nparts, pperm, 1); MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); FastRandomPermute(nvtxs, perm, 1); oldcut = graph->mincut; /* check to see if the partitioning is imbalanced */ Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); ubavg = savg(ncon, ubvec); lbavg = savg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { scopy(ncon*nparts, gnpwgts, ognpwgts); sset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ /* PASS ONE -- record stats for desired moves */ /**********************************************/ for (iii=0; iii<nvtxs; iii++) { i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; for (h=0; h<ncon; h++) if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT) break; if (h < ncon) { continue; } /* check for a potential improvement */ if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) { my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) { to = my_edegrees[k].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) break; } } oldto = to; /* check if a subdomain was found that fits */ if (k < tmp_rinfo[i].ndegrees) { for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) { to = my_edegrees[j].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) { if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || (my_edegrees[j].ewgt == my_edegrees[k].ewgt && IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ k = j; oldto = my_edegrees[k].edge; } } } } to = oldto; if (my_edegrees[k].ewgt > tmp_rinfo[i].id || (my_edegrees[k].ewgt == tmp_rinfo[i].id && (imbalanced || graph->level > 3 || iii % 8 == 0) && IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ /****************************************/ /* Update tmp arrays of the moved vertex */ /****************************************/ tmp_where[i] = to; moved[nmoved++] = i; for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; gnpwgts[to*ncon+h] += nvwgt[h]; gnpwgts[from*ncon+h] -= nvwgt[h]; movewgts[to*ncon+h] += nvwgt[h]; movewgts[from*ncon+h] -= nvwgt[h]; } tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } } /******************************************/ /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ /**************************/ overweight = 0; for (j=0; j<nparts; j++) { for (h=0; h<ncon; h++) { if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) { overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); } else { overfill[j*ncon+h] = 0.0; } overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } /****************************************************/ /* select moves to undo according to overfill array */ /****************************************************/ if (overweight == 1) { for (iii=0; iii<nmoved; iii++) { i = moved[iii]; oldto = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) if (my_edegrees[k].edge == where[i]) break; for (h=0; h<ncon; h++) if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; /**********************************/ /* nullify this move if necessary */ /**********************************/ if (k != tmp_rinfo[i].ndegrees && h != ncon) { moved[iii] = -1; from = oldto; to = where[i]; for (h=0; h<ncon; h++) { overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0); } tmp_where[i] = to; tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } /*************************************************/ /* PASS TWO -- commit the remainder of the moves */ /*************************************************/ nlupd = nsupd = nmoves = nchanged = 0; for (iii=0; iii<nmoved; iii++) { i = moved[iii]; if (i == -1) continue; where[i] = tmp_where[i]; /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ htable[i] = 1; update[nlupd++] = i; } /* Put the vertices adjacent to i into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = ladjncy[j]; if (htable[k] == 0) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; nswaps++; /* check number of zero-gain moves */ for (k=0; k<rinfo[i].ndegrees; k++) if (rinfo[i].degrees[k].edge == to) break; if (rinfo[i].id == rinfo[i].degrees[k].ewgt) nzgswaps++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } /* Tell interested pe's the new where[] info for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices / whose degrees need to be update. /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { yourlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the recieved to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; mydomain = where[i]; myrinfo = rinfo+i; tmp_myrinfo = tmp_rinfo+i; my_edegrees = myrinfo->degrees; your_edegrees = tmp_myrinfo->degrees; graph->lmincut -= myrinfo->ed; myrinfo->ndegrees = 0; myrinfo->id = 0; myrinfo->ed = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { yourdomain = where[ladjncy[j]]; if (mydomain != yourdomain) { myrinfo->ed += adjwgt[j]; for (k=0; k<myrinfo->ndegrees; k++) { if (my_edegrees[k].edge == yourdomain) { my_edegrees[k].ewgt += adjwgt[j]; your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { my_edegrees[k].edge = yourdomain; my_edegrees[k].ewgt = adjwgt[j]; your_edegrees[k].edge = yourdomain; your_edegrees[k].ewgt = adjwgt[j]; myrinfo->ndegrees++; } ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; tmp_myrinfo->id = myrinfo->id; tmp_myrinfo->ed = myrinfo->ed; tmp_myrinfo->ndegrees = myrinfo->ndegrees; } /* finally, sum-up the partition weights */ MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; if (graph->mincut == oldcut) break; } /* gnswaps = GlobalSESum(ctrl, nswaps); gnzgswaps = GlobalSESum(ctrl, nzgswaps); if (mype == 0) printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps); */ GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }
/************************************************************************* * This function performs an edge-based FM refinement **************************************************************************/ void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasses) { int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp; idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts; idxtype *moved, *swaps, *perm; PQueueType parts[2]; int higain, oldgain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; id = graph->id; ed = graph->ed; pwgts = graph->pwgts; bndptr = graph->bndptr; bndind = graph->bndind; moved = idxwspacemalloc(ctrl, nvtxs); swaps = idxwspacemalloc(ctrl, nvtxs); perm = idxwspacemalloc(ctrl, nvtxs); limit = (int) amin(amax(0.01*nvtxs, 15), 100); avgvwgt = amin((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs); tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)]; PQueueInit(ctrl, &parts[0], nvtxs, tmp); PQueueInit(ctrl, &parts[1], nvtxs, tmp); IFSET(ctrl->dbglvl, DBG_REFINE, printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d\n", pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut)); origdiff = abs(tpwgts[0]-pwgts[0]); idxset(nvtxs, -1, moved); for (pass=0; pass<npasses; pass++) { /* Do a number of passes */ PQueueReset(&parts[0]); PQueueReset(&parts[1]); mincutorder = -1; newcut = mincut = initcut = graph->mincut; mindiff = abs(tpwgts[0]-pwgts[0]); ASSERT(ComputeCut(graph, where) == graph->mincut); ASSERT(CheckBnd(graph)); /* Insert boundary nodes in the priority queues */ nbnd = graph->nbnd; RandomPermute(nbnd, perm, 1); for (ii=0; ii<nbnd; ii++) { i = perm[ii]; ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0); ASSERT(bndptr[bndind[i]] != -1); PQueueInsert(&parts[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]); } for (nswaps=0; nswaps<nvtxs; nswaps++) { from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1); to = (from+1)%2; if ((higain = PQueueGetMax(&parts[from])) == -1) break; ASSERT(bndptr[higain] != -1); newcut -= (ed[higain]-id[higain]); INC_DEC(pwgts[to], pwgts[from], vwgt[higain]); if ((newcut < mincut && abs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || (newcut == mincut && abs(tpwgts[0]-pwgts[0]) < mindiff)) { mincut = newcut; mindiff = abs(tpwgts[0]-pwgts[0]); mincutorder = nswaps; } else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */ newcut += (ed[higain]-id[higain]); INC_DEC(pwgts[from], pwgts[to], vwgt[higain]); break; } where[higain] = to; moved[higain] = nswaps; swaps[nswaps] = higain; IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1])); /************************************************************** * Update the id[i]/ed[i] values of the affected nodes ***************************************************************/ SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; oldgain = ed[k]-id[k]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); /* Update its boundary information and queue position */ if (bndptr[k] != -1) { /* If k was a boundary vertex */ if (ed[k] == 0) { /* Not a boundary vertex any more */ BNDDelete(nbnd, bndind, bndptr, k); if (moved[k] == -1) /* Remove it if in the queues */ PQueueDelete(&parts[where[k]], k, oldgain); } else { /* If it has not been moved, update its position in the queue */ if (moved[k] == -1) PQueueUpdate(&parts[where[k]], k, oldgain, ed[k]-id[k]); } } else { if (ed[k] > 0) { /* It will now become a boundary vertex */ BNDInsert(nbnd, bndind, bndptr, k); if (moved[k] == -1) PQueueInsert(&parts[where[k]], k, ed[k]-id[k]); } } } } /**************************************************************** * Roll back computations *****************************************************************/ for (i=0; i<nswaps; i++) moved[swaps[i]] = -1; /* reset moved array */ for (nswaps--; nswaps>mincutorder; nswaps--) { higain = swaps[nswaps]; to = where[higain] = (where[higain]+1)%2; SWAP(id[higain], ed[higain], tmp); if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) BNDDelete(nbnd, bndind, bndptr, higain); else if (ed[higain] > 0 && bndptr[higain] == -1) BNDInsert(nbnd, bndind, bndptr, higain); INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]); for (j=xadj[higain]; j<xadj[higain+1]; j++) { k = adjncy[j]; kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]); INC_DEC(id[k], ed[k], kwgt); if (bndptr[k] != -1 && ed[k] == 0) BNDDelete(nbnd, bndind, bndptr, k); if (bndptr[k] == -1 && ed[k] > 0) BNDInsert(nbnd, bndind, bndptr, k); } } IFSET(ctrl->dbglvl, DBG_REFINE, printf("\tMinimum cut: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd)); graph->mincut = mincut; graph->nbnd = nbnd; if (mincutorder == -1 || mincut == initcut) break; } PQueueFree(ctrl, &parts[0]); PQueueFree(ctrl, &parts[1]); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); idxwspacefree(ctrl, nvtxs); }