/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_HEM(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, nvtxs, cnvtxs, maxidx, dim; idxtype *xadj, *vwgt, *adjncy; idxtype *match, *cmap, *perm, *tperm; realtype curwgt, maxwgt; realtype *vvol, *vsurf, *adjwgt, *adjwgtsum; dim = ctrl->dim; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; vvol = graph->vvol; vsurf = graph->vsurf; adjncy = graph->adjncy; adjwgt = graph->adjwgt; adjwgtsum = graph->adjwgtsum; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); tperm = idxmalloc(nvtxs, "tperm"); RandomPermute(nvtxs, tperm, 1); BucketSortKeysInc(nvtxs, vwgt[iamax(nvtxs, vwgt)], vwgt, tperm, perm); /* RandomPermute(nvtxs, perm, 1); */ cnvtxs = 0; /* Compute a heavy-edge style matching giving preferance to small vertices */ for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { maxidx = i; maxwgt = 0.0; /* Find a heavy-edge matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; curwgt = 1.0/ARATIO2(dim, vsurf[i]+vsurf[k]+adjwgtsum[i]+adjwgtsum[k]- 2.0*adjwgt[j], vvol[i]+vvol[k]); if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize && curwgt > maxwgt) { maxwgt = curwgt; maxidx = k; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&tperm, &perm, &match, LTERM); }
/************************************************************************* * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ void ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) { int i, j, nvtxs; idxtype *vwgt, *where; idxtype *lpvtxs, *gpvtxs; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; lpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: lpvtxs"); gpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs"); for (j=i=0; i<nvtxs; i++) { lpvtxs[where[i]]++; if (where[i] != ctrl->mype) j++; } /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */ MPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); *nmoved = GlobalSESum(ctrl, j); *maxout = GlobalSEMax(ctrl, j); *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j)); GKfree((void **)&lpvtxs, (void **)&gpvtxs, LTERM); }
/***************************************************************************** * This function creates the nodal graph of a finite element mesh ******************************************************************************/ void QUADNODALMETIS(int nelmnts, int nvtxs, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy) { int i, j, jj, k, kk, /*kkk, l, m, n,*/ nedges; idxtype *nptr, *nind; idxtype *mark; int table[4][2] = {{1, 3}, {0, 2}, {1, 3}, {0, 2} }; /* Construct the node-element list first */ nptr = idxsmalloc(nvtxs+1, 0, "QUADNODALMETIS: nptr"); for (j=4*nelmnts, i=0; i<j; i++) nptr[elmnts[i]]++; MAKECSR(i, nvtxs, nptr); nind = idxmalloc(nptr[nvtxs], "QUADNODALMETIS: nind"); for (k=i=0; i<nelmnts; i++) { for (j=0; j<4; j++, k++) nind[nptr[elmnts[k]]++] = i; } for (i=nvtxs; i>0; i--) nptr[i] = nptr[i-1]; nptr[0] = 0; mark = idxsmalloc(nvtxs, -1, "QUADNODALMETIS: mark"); nedges = dxadj[0] = 0; for (i=0; i<nvtxs; i++) { mark[i] = i; for (j=nptr[i]; j<nptr[i+1]; j++) { jj=4*nind[j]; for (k=0; k<4; k++) { if (elmnts[jj+k] == i) break; } ASSERT(k != 4); /* You found the index, now go and put the 2 neighbors */ kk = elmnts[jj+table[k][0]]; if (mark[kk] != i) { mark[kk] = i; dadjncy[nedges++] = kk; } kk = elmnts[jj+table[k][1]]; if (mark[kk] != i) { mark[kk] = i; dadjncy[nedges++] = kk; } } dxadj[i+1] = nedges; } free(mark); free(nptr); free(nind); }
/************************************************************************* * This function setsup the CtrlType structure **************************************************************************/ GraphType *Moc_SetUpGraph(CtrlType *ctrl, int ncon, idxtype *vtxdist, idxtype *xadj, idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, int *wgtflag) { int i, j; GraphType *graph; int ltvwgts[MAXNCON]; graph = CreateGraph(); graph->level = 0; graph->gnvtxs = vtxdist[ctrl->npes]; graph->nvtxs = vtxdist[ctrl->mype+1]-vtxdist[ctrl->mype]; graph->ncon = ncon; graph->nedges = xadj[graph->nvtxs]; graph->xadj = xadj; graph->vwgt = vwgt; graph->adjncy = adjncy; graph->adjwgt = adjwgt; graph->vtxdist = vtxdist; if (((*wgtflag)&2) == 0) graph->vwgt = idxsmalloc(graph->nvtxs*ncon, 1, "Par_KMetis: vwgt"); if (((*wgtflag)&1) == 0) graph->adjwgt = idxsmalloc(graph->nedges, 1, "Par_KMetis: adjwgt"); /* compute tvwgts */ for (j=0; j<ncon; j++) ltvwgts[j] = 0; for (i=0; i<graph->nvtxs; i++) for (j=0; j<ncon; j++) ltvwgts[j] += graph->vwgt[i*ncon+j]; for (j=0; j<ncon; j++) ctrl->tvwgts[j] = GlobalSESum(ctrl, ltvwgts[j]); /* check for zero wgt constraints */ for (i=0; i<ncon; i++) { /* ADD: take care of the case in which tvwgts is zero */ if (ctrl->tvwgts[i] == 0) { rprintf(ctrl, "ERROR: sum weight for constraint %d is zero\n", i); MPI_Finalize(); exit(-1); } } /* compute nvwgts */ graph->nvwgt = fmalloc(graph->nvtxs*ncon, "graph->nvwgt"); for (i=0; i<graph->nvtxs; i++) { for (j=0; j<ncon; j++) graph->nvwgt[i*ncon+j] = (floattype)(graph->vwgt[i*ncon+j]) / (floattype)(ctrl->tvwgts[j]); } srand(ctrl->seed); return graph; }
/************************************************************************* * This function computes the normalized cut given the graph and a where vector **************************************************************************/ float ComputeNCutVector(GraphType *graph, idxtype *where, int npart,float* ncutVector) { int i, j, cm, nvtxs; idxtype *ncut, *degree, *xadj, *adjncy; float result; idxtype * adjwgt; ncut = idxsmalloc(npart, 0, "ComputeNCut: ncut"); degree = idxsmalloc(npart, 0, "ComputeNCut: degree"); if ( ncutVector == NULL ) { ncutVector=(float*)malloc(sizeof(float)*npart); } nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; if (graph->adjwgt == NULL) { for (i=0; i<nvtxs; i++) { cm = where[i]; for (j=xadj[i]; j<xadj[i+1]; j++){ if ( adjncy[j] != i ) degree[cm] ++; if (cm != where[adjncy[j]]) ncut[cm] ++; } } } else { for (i=0; i<nvtxs; i++) { cm = where[i]; for (j=xadj[i]; j<xadj[i+1]; j++){ if ( adjncy[j] != i ) degree[cm] += adjwgt[j]; if (cm != where[adjncy[j]]) ncut[cm] += adjwgt[j]; } } } int empty = 0; result =0; for (i=0; i<npart; i++){ if (degree[i] == 0) empty++; if (degree[i] >0) { ncutVector[i] =ncut[i] *1.0/ degree[i]; result += ncutVector[i]; } } //printf("Empty clusters: %d\n", empty); free(ncut); free(degree); return result+empty; }
/************************************************************************* * This function checks whether a graph is contigous or not **************************************************************************/ int IsConnected(CtrlType *ctrl, GraphType *graph, int report) { int i, j, k, nvtxs, first, last; idxtype *xadj, *adjncy, *touched, *queue; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); queue = idxmalloc(nvtxs, "IsConnected: queue"); touched[0] = 1; queue[0] = 0; first = 0; last = 1; while (first < last) { i = queue[first++]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (!touched[k]) { queue[last++] = k; touched[k] = 1; } } } if (first != nvtxs && report) printf("The graph is not connected. It has %d disconnected vertices!\n", nvtxs-first); return (first == nvtxs ? 1 : 0); }
/************************************************************************* * This function computes the balance of the partitioning **************************************************************************/ void ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec) { int i, j, nvtxs, ncon; idxtype *kpwgts, *vwgt; /*float balance;*/ nvtxs = graph->nvtxs; ncon = graph->ncon; vwgt = graph->vwgt; kpwgts = idxsmalloc(nparts, 0, "ComputePartitionInfo: kpwgts"); if (vwgt == NULL) { for (i=0; i<nvtxs; i++) kpwgts[where[i]]++; ubvec[0] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*nvtxs); } else { for (j=0; j<ncon; j++) { idxset(nparts, 0, kpwgts); for (i=0; i<graph->nvtxs; i++) kpwgts[where[i]] += vwgt[i*ncon+j]; ubvec[j] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)); } } free(kpwgts); }
/************************************************************************* * This function returns the number of connected components in cptr,cind * The separator of the graph is used to split it and then find its components. **************************************************************************/ int FindComponents(CtrlType *ctrl, GraphType *graph, idxtype *cptr, idxtype *cind) { int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; idxtype *xadj, *adjncy, *where, *touched, *queue; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; where = graph->where; touched = idxsmalloc(nvtxs, 0, "IsConnected: queue"); for (i=0; i<graph->nbnd; i++) touched[graph->bndind[i]] = 1; queue = cind; nleft = 0; for (i=0; i<nvtxs; i++) { if (where[i] != 2) nleft++; } for (i=0; i<nvtxs; i++) { if (where[i] != 2) break; } touched[i] = 1; queue[0] = i; first = 0; last = 1; cptr[0] = 0; /* This actually points to queue */ ncmps = 0; while (first != nleft) { if (first == last) { /* Find another starting vertex */ cptr[++ncmps] = first; for (i=0; i<nvtxs; i++) { if (!touched[i]) break; } queue[last++] = i; touched[i] = 1; } i = queue[first++]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (!touched[k]) { queue[last++] = k; touched[k] = 1; } } } cptr[++ncmps] = first; free(touched); return ncmps; }
/************************************************************************* * This function computes the cut given the graph and a where vector **************************************************************************/ idxtype ComputeMaxCut(GraphType *graph, idxtype nparts, idxtype *where) { idxtype i, j, maxcut; idxtype *cuts; cuts = idxsmalloc(nparts, 0, "ComputeMaxCut: cuts"); if (graph->adjwgt == NULL) { for (i=0; i<graph->nvtxs; i++) { for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (where[i] != where[graph->adjncy[j]]) cuts[where[i]]++; } } else { for (i=0; i<graph->nvtxs; i++) { for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) if (where[i] != where[graph->adjncy[j]]) cuts[where[i]] += graph->adjwgt[j]; } } maxcut = cuts[idxargmax(nparts, cuts)]; mprintf("%D => %D\n", idxargmax(nparts, cuts), maxcut); gk_free((void **)&cuts, LTERM); return maxcut; }
/************************************************************************* * This function checks whether or not partition pid is contigous **************************************************************************/ int IsConnected2(GraphType *graph, int report) { int i, j, k, nvtxs, first, last, nleft, ncmps, wgt; idxtype *xadj, *adjncy, *where, *touched, *queue; idxtype *cptr; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; where = graph->where; touched = idxsmalloc(nvtxs, 0, "IsConnected: touched"); queue = idxmalloc(nvtxs, "IsConnected: queue"); cptr = idxmalloc(nvtxs, "IsConnected: cptr"); nleft = nvtxs; touched[0] = 1; queue[0] = 0; first = 0; last = 1; cptr[0] = 0; /* This actually points to queue */ ncmps = 0; while (first != nleft) { if (first == last) { /* Find another starting vertex */ cptr[++ncmps] = first; for (i=0; i<nvtxs; i++) { if (!touched[i]) break; } queue[last++] = i; touched[i] = 1; } i = queue[first++]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (!touched[k]) { queue[last++] = k; touched[k] = 1; } } } cptr[++ncmps] = first; if (ncmps > 1 && report) { printf("%d connected components:\t", ncmps); for (i=0; i<ncmps; i++) { if (cptr[i+1]-cptr[i] > 200) printf("[%5d] ", cptr[i+1]-cptr[i]); } printf("\n"); } GKfree(&touched, &queue, &cptr, LTERM); return (ncmps == 1 ? 1 : 0); }
/***************************************************************************** * This function creates the nodal graph of a finite element mesh ******************************************************************************/ void TETNODALMETIS(int nelmnts, int nvtxs, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy) { int i, j, jj, k, kk, kkk, l, m, n, nedges; idxtype *nptr, *nind; idxtype *mark; /* Construct the node-element list first */ nptr = idxsmalloc(nvtxs+1, 0, "TETNODALMETIS: nptr"); for (j=4*nelmnts, i=0; i<j; i++) nptr[elmnts[i]]++; MAKECSR(i, nvtxs, nptr); nind = idxmalloc(nptr[nvtxs], "TETNODALMETIS: nind"); for (k=i=0; i<nelmnts; i++) { for (j=0; j<4; j++, k++) nind[nptr[elmnts[k]]++] = i; } for (i=nvtxs; i>0; i--) nptr[i] = nptr[i-1]; nptr[0] = 0; mark = idxsmalloc(nvtxs, -1, "TETNODALMETIS: mark"); nedges = dxadj[0] = 0; for (i=0; i<nvtxs; i++) { mark[i] = i; for (j=nptr[i]; j<nptr[i+1]; j++) { for (jj=4*nind[j], k=0; k<4; k++, jj++) { kk = elmnts[jj]; if (mark[kk] != i) { mark[kk] = i; dadjncy[nedges++] = kk; } } } dxadj[i+1] = nedges; } free(mark); free(nptr); free(nind); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void Match_RM(CtrlType *ctrl, GraphType *graph) { int i, ii, j, k, nvtxs, cnvtxs, maxidx; idxtype *xadj, *vwgt, *adjncy; idxtype *match, *cmap, *perm; nvtxs = graph->nvtxs; xadj = graph->xadj; vwgt = graph->vwgt; adjncy = graph->adjncy; cmap = graph->cmap = idxsmalloc(nvtxs, -1, "graph->cmap"); match = idxsmalloc(nvtxs, -1, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { maxidx = i; /* Find a random matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize) { maxidx = k; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } CreateCoarseGraph(graph, cnvtxs, match, perm); IMfree((void**)&match, &perm, LTERM); }
/************************************************************************* * This function computes the ratio assoc. given the graph and a where vector **************************************************************************/ float ComputeRAsso(GraphType *graph, idxtype *where, int npart) { int i, j, cm, nvtxs; idxtype *rasso, *clusterSize, *xadj, *adjncy; float result; idxtype * adjwgt; rasso = idxsmalloc(npart, 0, "ComputeNCut: ncut"); clusterSize = idxsmalloc(npart, 0, "ComputeNCut: degree"); nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; for (i=0; i<nvtxs; i++) clusterSize[where[i]] ++; if (graph->adjwgt == NULL) { for (i=0; i<nvtxs; i++) { cm = where[i]; for (j=xadj[i]; j<xadj[i+1]; j++) if (cm == where[adjncy[j]]) rasso[where[adjncy[j]]] ++; } } else { for (i=0; i<nvtxs; i++){ cm = where[i]; for (j=xadj[i]; j<xadj[i+1]; j++) if (cm == where[adjncy[j]]) rasso[where[adjncy[j]]] += adjwgt[j]; } } result =0; for (i=0; i<npart; i++){ if (clusterSize[i] >0) result += rasso[i] *1.0/ clusterSize[i]; } free(rasso); free(clusterSize); return result; }
/****************************************************************************** * This function takes a partition vector that is distributed and reads in * the original graph and computes the edgecut *******************************************************************************/ int ComputeRealCut2(idxtype *vtxdist, idxtype *mvtxdist, idxtype *part, idxtype *mpart, char *filename, MPI_Comm comm) { int i, j, nvtxs, mype, npes, cut; idxtype *xadj, *adjncy, *gpart, *gmpart, *perm, *sizes; MPI_Status status; MPI_Comm_size(comm, &npes); MPI_Comm_rank(comm, &mype); if (mype != 0) { MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm); MPI_Send((void *)mpart, mvtxdist[mype+1]-mvtxdist[mype], IDX_DATATYPE, 0, 1, comm); } else { /* Processor 0 does all the rest */ gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart"); idxcopy(vtxdist[1], part, gpart); gmpart = idxmalloc(mvtxdist[npes], "ComputeRealCut: gmpart"); idxcopy(mvtxdist[1], mpart, gmpart); for (i=1; i<npes; i++) { MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status); MPI_Recv((void *)(gmpart+mvtxdist[i]), mvtxdist[i+1]-mvtxdist[i], IDX_DATATYPE, i, 1, comm, &status); } /* OK, now go and reconstruct the permutation to go from the graph to mgraph */ perm = idxmalloc(vtxdist[npes], "ComputeRealCut: perm"); sizes = idxsmalloc(npes+1, 0, "ComputeRealCut: sizes"); for (i=0; i<vtxdist[npes]; i++) sizes[gpart[i]]++; MAKECSR(i, npes, sizes); for (i=0; i<vtxdist[npes]; i++) perm[i] = sizes[gpart[i]]++; /* Ok, now read the graph from the file */ ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy); /* OK, now compute the cut */ for (cut=0, i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (gmpart[perm[i]] != gmpart[perm[adjncy[j]]]) cut++; } } cut = cut/2; GKfree(&gpart, &gmpart, &perm, &sizes, &xadj, &adjncy, LTERM); return cut; } return 0; }
/************************************************************************* * This function is the entry point for detecting contacts between * bounding boxes and surface nodes **************************************************************************/ void METIS_FindContacts(void *raw_cinfo, idxtype *nboxes, double *boxcoords, idxtype *nparts, idxtype **r_cntptr, idxtype **r_cntind) { idxtype i, ncnts, tncnts, maxtncnts; idxtype *cntptr, *cntind, *auxcntind, *stack, *marker; ContactInfoType *cinfo; cinfo = (ContactInfoType *)raw_cinfo; maxtncnts = 6*(*nboxes); cntptr = idxsmalloc(*nboxes+1, 0, "METIS_FindContacts: cntptr"); cntind = idxmalloc(maxtncnts, "METIS_FindContacts: cntind"); auxcntind = idxmalloc(*nparts, "METIS_FindContacts: auxcntind"); stack = idxmalloc(cinfo->nnodes, "METIS_FindContacts: stack"); marker = idxsmalloc(*nparts, 0, "METIS_FindContacts: marker"); /* Go through each box and determine its contacting partitions */ for (tncnts=0, i=0; i<*nboxes; i++) { ncnts = FindBoxContacts(cinfo, boxcoords+i*6, stack, auxcntind, marker); if (ncnts == 0) mprintf("CSearchError: Box has no contacts!\n"); if (ncnts + tncnts >= maxtncnts) { maxtncnts += (tncnts+ncnts)*(*nboxes-i)/i; if ((cntind = (idxtype *)realloc(cntind, maxtncnts*sizeof(idxtype))) == NULL) errexit("Realloc failed! of %d words!\n", maxtncnts); } cntptr[i] = ncnts; idxcopy(ncnts, auxcntind, cntind+tncnts); tncnts += ncnts; } MAKECSR(i, *nboxes, cntptr); *r_cntptr = cntptr; *r_cntind = cntind; gk_free((void **)&auxcntind, &stack, &marker, LTERM); }
idxtype* getDegreeHistogram(GraphType* graph, int* maxDegree, int logScale) { int i; *maxDegree=0; int maxLogDegree; for ( i=0; i<graph->nvtxs; i++ ) { int k; if ( (k=(graph->xadj[i+1] - graph->xadj[i])) > *maxDegree ) { *maxDegree = k; maxLogDegree = getLogBin(k); } } idxtype* hist; if ( logScale > 0 ) { hist = idxsmalloc(maxLogDegree+1, 0, "getDegreeHistogram:hist"); } else { hist = idxsmalloc(*maxDegree+1, 0, "getDegreeHistogram:hist"); } for ( i=0; i<graph->nvtxs; i++ ) { int l = graph->xadj[i+1]-graph->xadj[i]; if ( logScale > 0 ) { l = getLogBin(l); } hist[l]++; } return hist; }
idxtype* getWeightsHistogram(GraphType* graph, int* maxWeight, int logScale) { int i; *maxWeight=0; int maxLogWeight; for ( i=0; i<graph->xadj[graph->nvtxs]; i++ ) { if ( graph->adjwgt[i] > *maxWeight ) { *maxWeight = graph->adjwgt[i]; maxLogWeight = getLogBin(graph->adjwgt[i]); } } idxtype* hist; if ( logScale > 0 ) { hist = idxsmalloc(maxLogWeight+1, 0, "getDegreeHistogram:hist"); } else { hist = idxsmalloc(*maxWeight+1, 0, "getDegreeHistogram:hist"); } for ( i=0; i<graph->xadj[graph->nvtxs]; i++ ) { int l = graph->adjwgt[i]; if ( logScale > 0 ) { l = getLogBin(l); } hist[l]++; } return hist; }
/*********************************************************************************** * This function is the entry point of the parallel multilevel local diffusion * algorithm. It uses parallel undirected diffusion followed by adaptive k-way * refinement. This function utilizes local coarsening. ************************************************************************************/ void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options, int *edgecut, idxtype *part, MPI_Comm *comm) { int npes, mype; CtrlType ctrl; WorkSpaceType wspace; GraphType *graph; MPI_Comm_size(*comm, &npes); MPI_Comm_rank(*comm, &mype); if (npes == 1) { /* Take care the npes = 1 case */ idxset(vtxdist[1], 0, part); *edgecut = 0; return; } if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1); SetUpCtrl(&ctrl, npes, options, *comm); ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes); graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag); graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize"); PreAllocateMemory(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype)); AdaptiveUndirected_Partition(&ctrl, graph, &wspace); IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype)); ReMapGraph(&ctrl, graph, 0, &wspace); idxcopy(graph->nvtxs, graph->where, part); *edgecut = graph->mincut; IMfree((void**)&graph->vsize, LTERM); FreeInitialGraphAndRemap(graph, *wgtflag); FreeWSpace(&wspace); FreeCtrl(&ctrl); if (*numflag == 1) ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0); }
/************************************************************************* * This function computes the size of the coarse graph **************************************************************************/ int ComputeCoarseGraphSize(int nvtxs, idxtype *xadj, idxtype *adjncy, int cnvtxs, idxtype *cmap, idxtype *match, idxtype *perm) { int i, j, k, istart, iend, nedges, cnedges, v, u; idxtype *htable; htable = idxsmalloc(cnvtxs, -1, "htable"); cnvtxs = cnedges = 0; for (i=0; i<nvtxs; i++) { v = perm[i]; if (cmap[v] != cnvtxs) continue; htable[cnvtxs] = cnvtxs; u = match[v]; istart = xadj[v]; iend = xadj[v+1]; for (j=istart; j<iend; j++) { k = cmap[adjncy[j]]; if (htable[k] != cnvtxs) { htable[k] = cnvtxs; cnedges++; } } if (v != u) { istart = xadj[u]; iend = xadj[u+1]; for (j=istart; j<iend; j++) { k = cmap[adjncy[j]]; if (htable[k] != cnvtxs) { htable[k] = cnvtxs; cnedges++; } } } cnvtxs++; } GKfree(&htable, LTERM); return cnedges; }
/************************************************************************* * This function computes the subdomain graph **************************************************************************/ void PrintSubDomainGraph(GraphType *graph, int nparts, idxtype *where) { int i, j, k, me, nvtxs, total, max; idxtype *xadj, *adjncy, *adjwgt, *pmat; nvtxs = graph->nvtxs; xadj = graph->xadj; adjncy = graph->adjncy; adjwgt = graph->adjwgt; pmat = idxsmalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat"); for (i=0; i<nvtxs; i++) { me = where[i]; for (j=xadj[i]; j<xadj[i+1]; j++) { k = adjncy[j]; if (where[k] != me) pmat[me*nparts+where[k]] += adjwgt[j]; } } /* printf("Subdomain Info\n"); */ total = max = 0; for (i=0; i<nparts; i++) { for (k=0, j=0; j<nparts; j++) { if (pmat[i*nparts+j] > 0) k++; } total += k; if (k > max) max = k; /* printf("%2d -> %2d ", i, k); for (j=0; j<nparts; j++) { if (pmat[i*nparts+j] > 0) printf("[%2d %4d] ", j, pmat[i*nparts+j]); } printf("\n"); */ } printf("Total adjacent subdomains: %d, Max: %d\n", total, max); free(pmat); }
/************************************************************************* * This function computes the balance of the element partitioning **************************************************************************/ float ComputeElementBalance(int ne, int nparts, idxtype *where) { int i; idxtype *kpwgts; float balance; kpwgts = idxsmalloc(nparts, 0, "ComputeElementBalance: kpwgts"); for (i=0; i<ne; i++) kpwgts[where[i]]++; balance = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts)); free(kpwgts); return balance; }
/************************************************************************* * This function uses simple counting sort to return a permutation array * corresponding to the sorted order. The keys are assumed to start from * 0 and they are positive. This sorting is used during matching. **************************************************************************/ void BucketSortKeysInc(int n, int max, idxtype *keys, idxtype *tperm, idxtype *perm) { int i, ii; idxtype *counts; counts = idxsmalloc(max+2, 0, "BucketSortKeysInc: counts"); for (i=0; i<n; i++) counts[keys[i]]++; MAKECSR(i, max+1, counts); for (ii=0; ii<n; ii++) { i = tperm[ii]; perm[counts[keys[i]]++] = i; } free(counts); }
/************************************************************************* * This function uses simple counting sort to return a permutation array * corresponding to the sorted order. The keys are agk_fsumed to start from * 0 and they are positive. This sorting is used during matching. **************************************************************************/ void BucketSortKeysInc(idxtype n, idxtype max, idxtype *keys, idxtype *tperm, idxtype *perm) { idxtype i, ii; idxtype *counts; counts = idxsmalloc(max+2, 0, "BucketSortKeysInc: counts"); for (i=0; i<n; i++) counts[keys[i]]++; MAKECSR(i, max+1, counts); for (ii=0; ii<n; ii++) { i = tperm[ii]; perm[counts[keys[i]]++] = i; } gk_free((void **)&counts, LTERM); }
void pingpong(CtrlType *ctrl, GraphType *graph, int nparts, int chain_length, float *tpwgts, float ubfactor, int toplevel) // do batch-local search; chain_length is the search length { int nvtxs, nedges, moves, iter; idxtype *w; //float *m_adjwgt; nedges = graph->nedges; nvtxs = graph->nvtxs; w = idxsmalloc(nvtxs, 0, "pingpong: weight"); Compute_Weights(ctrl, graph, w); //m_adjwgt = fmalloc(nedges, "pingpong: normalized matrix"); //transform_matrix(ctrl, graph, w, m_adjwgt); //printf("Chain length is %d.\n", chain_length); moves =0; iter =0; //printf("Number of boundary points is %d\n", graph->nbnd); do{ //Weighted_kernel_k_means(ctrl, graph, nparts, w, m_adjwgt, tpwgts, ubfactor); Weighted_kernel_k_means(ctrl, graph, nparts, w, tpwgts, ubfactor); if (chain_length>0){ //moves = local_search(ctrl, graph, nparts, chain_length, w, m_adjwgt, tpwgts, ubfactor); moves = local_search(ctrl, graph, nparts, chain_length, w, tpwgts, ubfactor); //printf("Number of local search moves is %d\n", moves); //printf("Number of boundary points is %d\n", graph->nbnd); } iter ++; if (iter > MAXITERATIONS) break; }while(moves >0) ; if(memory_saving ==0){ remove_empty_clusters_l1(ctrl, graph, nparts, w, tpwgts, ubfactor); if(toplevel>0) remove_empty_clusters_l2(ctrl, graph, nparts, w, tpwgts, ubfactor); } free(w); //free(m_adjwgt); }
/************************************************************************* * This function finds a matching using the HEM heuristic **************************************************************************/ void EstimateCFraction(int nvtxs, idxtype *xadj, idxtype *adjncy, floattype *vfraction, floattype *efraction) { int i, ii, j, cnvtxs, cnedges, maxidx; idxtype *match, *cmap, *perm; cmap = idxmalloc(nvtxs, "cmap"); match = idxsmalloc(nvtxs, UNMATCHED, "match"); perm = idxmalloc(nvtxs, "perm"); RandomPermute(nvtxs, perm, 1); cnvtxs = 0; for (ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (match[i] == UNMATCHED) { /* Unmatched */ maxidx = i; /* Find a random matching, subject to maxvwgt constraints */ for (j=xadj[i]; j<xadj[i+1]; j++) { if (match[adjncy[j]] == UNMATCHED) { maxidx = adjncy[j]; break; } } cmap[i] = cmap[maxidx] = cnvtxs++; match[i] = maxidx; match[maxidx] = i; } } cnedges = ComputeCoarseGraphSize(nvtxs, xadj, adjncy, cnvtxs, cmap, match, perm); *vfraction = (1.0*cnvtxs)/(1.0*nvtxs); *efraction = (1.0*cnedges)/(1.0*xadj[nvtxs]); GKfree(&cmap, &match, &perm, LTERM); }
/************************************************************************* * This function computes movement statistics for adaptive refinement * schemes **************************************************************************/ void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout) { int i, nvtxs, nparts, myhome; idxtype *vwgt, *where; idxtype *lend, *gend, *lleft, *gleft, *lstart, *gstart; nvtxs = graph->nvtxs; vwgt = graph->vwgt; where = graph->where; nparts = ctrl->nparts; lstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lstart"); gstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gstart"); lleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lleft"); gleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gleft"); lend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lend"); gend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gend"); for (i=0; i<nvtxs; i++) { myhome = (ctrl->ps_relation == COUPLED) ? ctrl->mype : graph->home[i]; lstart[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; lend[where[i]] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; if (where[i] != myhome) lleft[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i]; } /* PrintVector(ctrl, ctrl->npes, 0, lend, "Lend: "); */ MPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); MPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm); *nmoved = idxsum(nparts, gleft); *maxout = gleft[idxamax(nparts, gleft)]; for (i=0; i<nparts; i++) lstart[i] = gend[i]+gleft[i]-gstart[i]; *maxin = lstart[idxamax(nparts, lstart)]; GKfree((void **)&lstart, (void **)&gstart, (void **)&lleft, (void **)&gleft, (void **)&lend, (void **)&gend, LTERM); }
/************************************************************************* * This function performs k-way refinement **************************************************************************/ void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses) { int h, i, ii, iii, j, k, c; int pass, nvtxs, nedges, ncon; int nmoves, nmoved, nswaps, nzgswaps; /* int gnswaps, gnzgswaps; */ int me, firstvtx, lastvtx, yourlastvtx; int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight; int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts; int nlupd, nsupd, nnbrs, nchanged; idxtype *xadj, *ladjncy, *adjwgt, *vtxdist; idxtype *where, *tmp_where, *moved; floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill; idxtype *update, *supdate, *rupdate, *pe_updates; idxtype *changed, *perm, *pperm, *htable; idxtype *peind, *recvptr, *sendptr; KeyValueType *swchanges, *rwchanges; RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo; EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees; floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg; int *nupds_pe; IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr)); /*************************/ /* set up common aliases */ /*************************/ nvtxs = graph->nvtxs; nedges = graph->nedges; ncon = graph->ncon; vtxdist = graph->vtxdist; xadj = graph->xadj; ladjncy = graph->adjncy; adjwgt = graph->adjwgt; firstvtx = vtxdist[mype]; lastvtx = vtxdist[mype+1]; where = graph->where; rinfo = graph->rinfo; lnpwgts = graph->lnpwgts; gnpwgts = graph->gnpwgts; ubvec = ctrl->ubvec; tpwgts = ctrl->tpwgts; nnbrs = graph->nnbrs; peind = graph->peind; recvptr = graph->recvptr; sendptr = graph->sendptr; changed = idxmalloc(nvtxs, "KWR: changed"); rwchanges = wspace->pairs; swchanges = rwchanges + recvptr[nnbrs]; /************************************/ /* set up important data structures */ /************************************/ perm = idxmalloc(nvtxs, "KWR: perm"); pperm = idxmalloc(nparts, "KWR: pperm"); update = idxmalloc(nvtxs, "KWR: update"); supdate = wspace->indices; rupdate = supdate + recvptr[nnbrs]; nupds_pe = imalloc(npes, "KWR: nupds_pe"); htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable"); badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt"); for (i=0; i<nparts; i++) { for (h=0; h<ncon; h++) { badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h]; } } movewgts = fmalloc(nparts*ncon, "KWR: movewgts"); ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts"); pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts"); overfill = fmalloc(nparts*ncon, "KWR: overfill"); moved = idxmalloc(nvtxs, "KWR: moved"); tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where"); tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo"); tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees"); idxcopy(nvtxs+graph->nrecv, where, tmp_where); for (i=0; i<nvtxs; i++) { tmp_rinfo[i].id = rinfo[i].id; tmp_rinfo[i].ed = rinfo[i].ed; tmp_rinfo[i].ndegrees = rinfo[i].ndegrees; tmp_rinfo[i].degrees = tmp_edegrees+xadj[i]; for (j=0; j<rinfo[i].ndegrees; j++) { tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge; tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt; } } nswaps = nzgswaps = 0; /*********************************************************/ /* perform a small number of passes through the vertices */ /*********************************************************/ for (pass=0; pass<npasses; pass++) { if (mype == 0) RandomPermute(nparts, pperm, 1); MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm); FastRandomPermute(nvtxs, perm, 1); oldcut = graph->mincut; /* check to see if the partitioning is imbalanced */ Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec); ubavg = savg(ncon, ubvec); lbavg = savg(ncon, lbvec); imbalanced = (lbavg > ubavg) ? 1 : 0; for (c=0; c<2; c++) { scopy(ncon*nparts, gnpwgts, ognpwgts); sset(ncon*nparts, 0.0, movewgts); nmoved = 0; /**********************************************/ /* PASS ONE -- record stats for desired moves */ /**********************************************/ for (iii=0; iii<nvtxs; iii++) { i = perm[iii]; from = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; for (h=0; h<ncon; h++) if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT) break; if (h < ncon) { continue; } /* check for a potential improvement */ if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) { my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) { to = my_edegrees[k].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) break; } } oldto = to; /* check if a subdomain was found that fits */ if (k < tmp_rinfo[i].ndegrees) { for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) { to = my_edegrees[j].edge; if (ProperSide(c, pperm[from], pperm[to])) { for (h=0; h<ncon; h++) if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0) break; if (h == ncon) { if (my_edegrees[j].ewgt > my_edegrees[k].ewgt || (my_edegrees[j].ewgt == my_edegrees[k].ewgt && IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ k = j; oldto = my_edegrees[k].edge; } } } } to = oldto; if (my_edegrees[k].ewgt > tmp_rinfo[i].id || (my_edegrees[k].ewgt == tmp_rinfo[i].id && (imbalanced || graph->level > 3 || iii % 8 == 0) && IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){ /****************************************/ /* Update tmp arrays of the moved vertex */ /****************************************/ tmp_where[i] = to; moved[nmoved++] = i; for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; gnpwgts[to*ncon+h] += nvwgt[h]; gnpwgts[from*ncon+h] -= nvwgt[h]; movewgts[to*ncon+h] += nvwgt[h]; movewgts[from*ncon+h] -= nvwgt[h]; } tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } } /******************************************/ /* Let processors know the subdomain wgts */ /* if all proposed moves commit. */ /******************************************/ MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); /**************************/ /* compute overfill array */ /**************************/ overweight = 0; for (j=0; j<nparts; j++) { for (h=0; h<ncon; h++) { if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) { overfill[j*ncon+h] = (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) / (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]); } else { overfill[j*ncon+h] = 0.0; } overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0); overfill[j*ncon+h] *= movewgts[j*ncon+h]; if (overfill[j*ncon+h] > 0.0) overweight = 1; ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] || pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h], (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h], badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h])); } } /****************************************************/ /* select moves to undo according to overfill array */ /****************************************************/ if (overweight == 1) { for (iii=0; iii<nmoved; iii++) { i = moved[iii]; oldto = tmp_where[i]; nvwgt = graph->nvwgt+i*ncon; my_edegrees = tmp_rinfo[i].degrees; for (k=0; k<tmp_rinfo[i].ndegrees; k++) if (my_edegrees[k].edge == where[i]) break; for (h=0; h<ncon; h++) if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0) break; /**********************************/ /* nullify this move if necessary */ /**********************************/ if (k != tmp_rinfo[i].ndegrees && h != ncon) { moved[iii] = -1; from = oldto; to = where[i]; for (h=0; h<ncon; h++) { overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0); } tmp_where[i] = to; tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt; SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j); if (my_edegrees[k].ewgt == 0) { tmp_rinfo[i].ndegrees--; my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge; my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt; } else { my_edegrees[k].edge = from; } for (h=0; h<ncon; h++) { lnpwgts[to*ncon+h] += nvwgt[h]; lnpwgts[from*ncon+h] -= nvwgt[h]; } /* Update the degrees of adjacent vertices */ for (j=xadj[i]; j<xadj[i+1]; j++) { /* no need to bother about vertices on different pe's */ if (ladjncy[j] >= nvtxs) continue; me = ladjncy[j]; mydomain = tmp_where[me]; myrinfo = tmp_rinfo+me; your_edegrees = myrinfo->degrees; if (mydomain == from) { INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]); } else { if (mydomain == to) { INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]); } } /* Remove contribution from the .ed of 'from' */ if (mydomain != from) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == from) { if (your_edegrees[k].ewgt == adjwgt[j]) { myrinfo->ndegrees--; your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge; your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt; } else { your_edegrees[k].ewgt -= adjwgt[j]; } break; } } } /* Add contribution to the .ed of 'to' */ if (mydomain != to) { for (k=0; k<myrinfo->ndegrees; k++) { if (your_edegrees[k].edge == to) { your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { your_edegrees[myrinfo->ndegrees].edge = to; your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j]; } } } } } } /*************************************************/ /* PASS TWO -- commit the remainder of the moves */ /*************************************************/ nlupd = nsupd = nmoves = nchanged = 0; for (iii=0; iii<nmoved; iii++) { i = moved[iii]; if (i == -1) continue; where[i] = tmp_where[i]; /* Make sure to update the vertex information */ if (htable[i] == 0) { /* make sure you do the update */ htable[i] = 1; update[nlupd++] = i; } /* Put the vertices adjacent to i into the update array */ for (j=xadj[i]; j<xadj[i+1]; j++) { k = ladjncy[j]; if (htable[k] == 0) { htable[k] = 1; if (k<nvtxs) update[nlupd++] = k; else supdate[nsupd++] = k; } } nmoves++; nswaps++; /* check number of zero-gain moves */ for (k=0; k<rinfo[i].ndegrees; k++) if (rinfo[i].degrees[k].edge == to) break; if (rinfo[i].id == rinfo[i].degrees[k].ewgt) nzgswaps++; if (graph->pexadj[i+1]-graph->pexadj[i] > 0) changed[nchanged++] = i; } /* Tell interested pe's the new where[] info for the interface vertices */ CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, rwchanges, wspace->pv4); IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%.4f], [%d %d %d]\n", pass, c, badmaxpwgt[0], GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), GlobalSESum(ctrl, nlupd))); /*------------------------------------------------------------- / Time to communicate with processors to send the vertices / whose degrees need to be update. /-------------------------------------------------------------*/ /* Issue the receives first */ for (i=0; i<nnbrs; i++) { MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->rreq+i); } /* Issue the sends next. This needs some preporcessing */ for (i=0; i<nsupd; i++) { htable[supdate[i]] = 0; supdate[i] = graph->imap[supdate[i]]; } iidxsort(nsupd, supdate); for (j=i=0; i<nnbrs; i++) { yourlastvtx = vtxdist[peind[i]+1]; for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i); j = k; } /* OK, now get into the loop waiting for the send/recv operations to finish */ MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses); for (i=0; i<nnbrs; i++) MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i); MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses); /*------------------------------------------------------------- / Place the recieved to-be updated vertices into update[] /-------------------------------------------------------------*/ for (i=0; i<nnbrs; i++) { pe_updates = rupdate+sendptr[i]; for (j=0; j<nupds_pe[i]; j++) { k = pe_updates[j]; if (htable[k-firstvtx] == 0) { htable[k-firstvtx] = 1; update[nlupd++] = k-firstvtx; } } } /*------------------------------------------------------------- / Update the rinfo of the vertices in the update[] array /-------------------------------------------------------------*/ for (ii=0; ii<nlupd; ii++) { i = update[ii]; ASSERT(ctrl, htable[i] == 1); htable[i] = 0; mydomain = where[i]; myrinfo = rinfo+i; tmp_myrinfo = tmp_rinfo+i; my_edegrees = myrinfo->degrees; your_edegrees = tmp_myrinfo->degrees; graph->lmincut -= myrinfo->ed; myrinfo->ndegrees = 0; myrinfo->id = 0; myrinfo->ed = 0; for (j=xadj[i]; j<xadj[i+1]; j++) { yourdomain = where[ladjncy[j]]; if (mydomain != yourdomain) { myrinfo->ed += adjwgt[j]; for (k=0; k<myrinfo->ndegrees; k++) { if (my_edegrees[k].edge == yourdomain) { my_edegrees[k].ewgt += adjwgt[j]; your_edegrees[k].ewgt += adjwgt[j]; break; } } if (k == myrinfo->ndegrees) { my_edegrees[k].edge = yourdomain; my_edegrees[k].ewgt = adjwgt[j]; your_edegrees[k].edge = yourdomain; your_edegrees[k].ewgt = adjwgt[j]; myrinfo->ndegrees++; } ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]); ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]); } else { myrinfo->id += adjwgt[j]; } } graph->lmincut += myrinfo->ed; tmp_myrinfo->id = myrinfo->id; tmp_myrinfo->ed = myrinfo->ed; tmp_myrinfo->ndegrees = myrinfo->ndegrees; } /* finally, sum-up the partition weights */ MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm); } graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2; if (graph->mincut == oldcut) break; } /* gnswaps = GlobalSESum(ctrl, nswaps); gnzgswaps = GlobalSESum(ctrl, nzgswaps); if (mype == 0) printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps); */ GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM); GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM); GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM); GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM); IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr)); }
/************************************************************************* * This function compresses a graph by merging identical vertices * The compression should lead to at least 10% reduction. **************************************************************************/ void CompressGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *cptr, idxtype *cind) { int i, ii, iii, j, jj, k, l, cnvtxs, cnedges; idxtype *cxadj, *cadjncy, *cvwgt, *mark, *map; KeyValueType *keys; mark = idxsmalloc(nvtxs, -1, "CompressGraph: mark"); map = idxsmalloc(nvtxs, -1, "CompressGraph: map"); keys = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "CompressGraph: keys"); /* Compute a key for each adjacency list */ for (i=0; i<nvtxs; i++) { k = 0; for (j=xadj[i]; j<xadj[i+1]; j++) k += adjncy[j]; keys[i].key = k+i; /* Add the diagonal entry as well */ keys[i].val = i; } ikeysort(nvtxs, keys); l = cptr[0] = 0; for (cnvtxs=i=0; i<nvtxs; i++) { ii = keys[i].val; if (map[ii] == -1) { mark[ii] = i; /* Add the diagonal entry */ for (j=xadj[ii]; j<xadj[ii+1]; j++) mark[adjncy[j]] = i; cind[l++] = ii; map[ii] = cnvtxs; for (j=i+1; j<nvtxs; j++) { iii = keys[j].val; if (keys[i].key != keys[j].key || xadj[ii+1]-xadj[ii] != xadj[iii+1]-xadj[iii]) break; /* Break if keys or degrees are different */ if (map[iii] == -1) { /* Do a comparison if iii has not been mapped */ for (jj=xadj[iii]; jj<xadj[iii+1]; jj++) { if (mark[adjncy[jj]] != i) break; } if (jj == xadj[iii+1]) { /* Identical adjacency structure */ map[iii] = cnvtxs; cind[l++] = iii; } } } cptr[++cnvtxs] = l; } } /* printf("Original: %6d, Compressed: %6d\n", nvtxs, cnvtxs); */ InitGraph(graph); if (cnvtxs >= COMPRESSION_FRACTION*nvtxs) { graph->nvtxs = nvtxs; graph->nedges = xadj[nvtxs]; graph->ncon = 1; graph->xadj = xadj; graph->adjncy = adjncy; graph->gdata = idxmalloc(3*nvtxs+graph->nedges, "CompressGraph: gdata"); graph->vwgt = graph->gdata; graph->adjwgtsum = graph->gdata+nvtxs; graph->cmap = graph->gdata+2*nvtxs; graph->adjwgt = graph->gdata+3*nvtxs; idxset(nvtxs, 1, graph->vwgt); idxset(graph->nedges, 1, graph->adjwgt); for (i=0; i<nvtxs; i++) graph->adjwgtsum[i] = xadj[i+1]-xadj[i]; graph->label = idxmalloc(nvtxs, "CompressGraph: label"); for (i=0; i<nvtxs; i++) graph->label[i] = i; } else { /* Ok, form the compressed graph */ cnedges = 0; for (i=0; i<cnvtxs; i++) { ii = cind[cptr[i]]; cnedges += xadj[ii+1]-xadj[ii]; } /* Allocate memory for the compressed graph*/ graph->gdata = idxmalloc(4*cnvtxs+1 + 2*cnedges, "CompressGraph: gdata"); cxadj = graph->xadj = graph->gdata; cvwgt = graph->vwgt = graph->gdata + cnvtxs+1; graph->adjwgtsum = graph->gdata + 2*cnvtxs+1; graph->cmap = graph->gdata + 3*cnvtxs+1; cadjncy = graph->adjncy = graph->gdata + 4*cnvtxs+1; graph->adjwgt = graph->gdata + 4*cnvtxs+1 + cnedges; /* Now go and compress the graph */ idxset(nvtxs, -1, mark); l = cxadj[0] = 0; for (i=0; i<cnvtxs; i++) { cvwgt[i] = cptr[i+1]-cptr[i]; mark[i] = i; /* Remove any dioganal entries in the compressed graph */ for (j=cptr[i]; j<cptr[i+1]; j++) { ii = cind[j]; for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) { k = map[adjncy[jj]]; if (mark[k] != i) cadjncy[l++] = k; mark[k] = i; } } cxadj[i+1] = l; } graph->nvtxs = cnvtxs; graph->nedges = l; graph->ncon = 1; idxset(graph->nedges, 1, graph->adjwgt); for (i=0; i<cnvtxs; i++) graph->adjwgtsum[i] = cxadj[i+1]-cxadj[i]; graph->label = idxmalloc(cnvtxs, "CompressGraph: label"); for (i=0; i<cnvtxs; i++) graph->label[i] = i; } GKfree(&keys, &map, &mark, LTERM); }
/************************************************************************* * This function partitions a finite element mesh by partitioning its nodal * graph using KMETIS and then assigning elements in a load balanced fashion. **************************************************************************/ void METIS_PartMeshNodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart) { int i, j, k, me; idxtype *xadj, *adjncy, *pwgts; int options[10], pnumflag=0, wgtflag=0; int nnbrs, nbrind[200], nbrwgt[200], maxpwgt; int esize, esizes[] = {-1, 3, 4, 8, 4}; esize = esizes[*etype]; if (*numflag == 1) ChangeMesh2CNumbering((*ne)*esize, elmnts); xadj = idxmalloc(*nn+1, "METIS_MESHPARTNODAL: xadj"); adjncy = idxmalloc(20*(*nn), "METIS_MESHPARTNODAL: adjncy"); METIS_MeshToNodal(ne, nn, elmnts, etype, &pnumflag, xadj, adjncy); adjncy = realloc(adjncy, xadj[*nn]*sizeof(idxtype)); options[0] = 0; METIS_PartGraphKway(nn, xadj, adjncy, NULL, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, npart); /* OK, now compute an element partition based on the nodal partition npart */ idxset(*ne, -1, epart); pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTNODAL: pwgts"); for (i=0; i<*ne; i++) { me = npart[elmnts[i*esize]]; for (j=1; j<esize; j++) { if (npart[elmnts[i*esize+j]] != me) break; } if (j == esize) { epart[i] = me; pwgts[me]++; } } maxpwgt = 1.03*(*ne)/(*nparts); for (i=0; i<*ne; i++) { if (epart[i] == -1) { /* Assign the boundary element */ nnbrs = 0; for (j=0; j<esize; j++) { me = npart[elmnts[i*esize+j]]; for (k=0; k<nnbrs; k++) { if (nbrind[k] == me) { nbrwgt[k]++; break; } } if (k == nnbrs) { nbrind[nnbrs] = me; nbrwgt[nnbrs++] = 1; } } /* Try to assign it first to the domain with most things in common */ j = iamax(nnbrs, nbrwgt); if (pwgts[nbrind[j]] < maxpwgt) { epart[i] = nbrind[j]; } else { /* If that fails, assign it to a light domain */ for (j=0; j<nnbrs; j++) { if (pwgts[nbrind[j]] < maxpwgt) { epart[i] = nbrind[j]; break; } } if (j == nnbrs) epart[i] = nbrind[iamax(nnbrs, nbrwgt)]; } pwgts[epart[i]]++; } } if (*numflag == 1) ChangeMesh2FNumbering2((*ne)*esize, elmnts, *ne, *nn, epart, npart); GKfree(&xadj, &adjncy, &pwgts, LTERM); }
void METIS_PartMeshDual_WV(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, int *nparts, int *edgecut, idxtype *epart, idxtype *npart, idxtype *vwgts) { int i, j, k, me; idxtype *xadj, *adjncy, *pwgts, *nptr, *nind; int options[10], pnumflag=0, wgtflag=2; int nnbrs, nbrind[200], nbrwgt[200], maxpwgt; int esize, esizes[] = {-1, 3, 4, 8, 4}; esize = esizes[*etype]; if (*numflag == 1) ChangeMesh2CNumbering((*ne)*esize, elmnts); xadj = idxmalloc(*ne+1, "METIS_MESHPARTNODAL: xadj"); adjncy = idxmalloc(esize*(*ne), "METIS_MESHPARTNODAL: adjncy"); METIS_MeshToDual(ne, nn, elmnts, etype, &pnumflag, xadj, adjncy); options[0] = 0; METIS_PartGraphKway(ne, xadj, adjncy, vwgts, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, epart); /* Construct the node-element list */ nptr = idxsmalloc(*nn+1, 0, "METIS_MESHPARTDUAL: nptr"); for (j=esize*(*ne), i=0; i<j; i++) nptr[elmnts[i]]++; MAKECSR(i, *nn, nptr); nind = idxmalloc(nptr[*nn], "METIS_MESHPARTDUAL: nind"); for (k=i=0; i<(*ne); i++) { for (j=0; j<esize; j++, k++) nind[nptr[elmnts[k]]++] = i; } for (i=(*nn); i>0; i--) nptr[i] = nptr[i-1]; nptr[0] = 0; /* OK, now compute a nodal partition based on the element partition npart */ idxset(*nn, -1, npart); pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTDUAL: pwgts"); for (i=0; i<*nn; i++) { me = epart[nind[nptr[i]]]; for (j=nptr[i]+1; j<nptr[i+1]; j++) { if (epart[nind[j]] != me) break; } if (j == nptr[i+1]) { npart[i] = me; pwgts[me]++; } } maxpwgt = 1.03*(*nn)/(*nparts); for (i=0; i<*nn; i++) { if (npart[i] == -1) { /* Assign the boundary element */ nnbrs = 0; for (j=nptr[i]; j<nptr[i+1]; j++) { me = epart[nind[j]]; for (k=0; k<nnbrs; k++) { if (nbrind[k] == me) { nbrwgt[k]++; break; } } if (k == nnbrs) { nbrind[nnbrs] = me; nbrwgt[nnbrs++] = 1; } } /* Try to assign it first to the domain with most things in common */ j = iamax(nnbrs, nbrwgt); if (pwgts[nbrind[j]] < maxpwgt) { npart[i] = nbrind[j]; } else { /* If that fails, assign it to a light domain */ npart[i] = nbrind[0]; for (j=0; j<nnbrs; j++) { if (pwgts[nbrind[j]] < maxpwgt) { npart[i] = nbrind[j]; break; } } } pwgts[npart[i]]++; } } if (*numflag == 1) ChangeMesh2FNumbering2((*ne)*esize, elmnts, *ne, *nn, epart, npart); GKfree(&xadj, &adjncy, &pwgts, &nptr, &nind, LTERM); }