/************************************************************************* * This function performs a k-way directed diffusion **************************************************************************/ real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home) { idx_t ii, i, j, k, l, nvtxs, nedges, nparts; idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize; idx_t npasses, first, second, third, mind, maxd; idx_t *xadj, *adjncy, *adjwgt, *where, *perm; idx_t *rowptr, *colind, *ed, *psize; real_t *transfer, *tmpvec; real_t balance = -1.0, *load, *solution, *workspace; real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor; matrix_t matrix; ikv_t *cand; idx_t ndirty, nclean, dptr, clean; nvtxs = graph->nvtxs; nedges = graph->nedges; xadj = graph->xadj; nvwgt = graph->nvwgt; adjncy = graph->adjncy; adjwgt = graph->adjwgt; where = graph->where; nparts = ctrl->nparts; ubfactor = ctrl->ubvec[0]; matrix.nrows = nparts; flowFactor = 0.35; flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor; flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor; flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor; /* allocate memory */ solution = rmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution"); tmpvec = solution + nparts; npwgts = solution + 2*nparts; load = solution + 3*nparts; matrix.values = solution + 4*nparts; transfer = matrix.transfer = solution + 4*nparts + nedges; perm = imalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm"); ed = perm + nvtxs; psize = perm + 2*nvtxs; rowptr = matrix.rowptr = perm + 2*nvtxs + nparts; colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1; /*GKTODO - Potential problem with this malloc */ wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1)); workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace"); cand = ikvmalloc(nvtxs, "WavefrontDiffusion: cand"); /*****************************/ /* Populate empty subdomains */ /*****************************/ iset(nparts, 0, psize); for (i=0; i<nvtxs; i++) psize[where[i]]++; mind = iargmin(nparts, psize); maxd = iargmax(nparts, psize); if (psize[mind] == 0) { for (i=0; i<nvtxs; i++) { k = (RandomInRange(nvtxs)+i)%nvtxs; if (where[k] == maxd) { where[k] = mind; psize[mind]++; psize[maxd]--; break; } } } iset(nvtxs, 0, ed); rset(nparts, 0.0, npwgts); for (i=0; i<nvtxs; i++) { npwgts[where[i]] += nvwgt[i]; for (j=xadj[i]; j<xadj[i+1]; j++) ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0); } ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0); done = 0; /* zero out the tmpvec array */ rset(nparts, 0.0, tmpvec); npasses = gk_min(nparts/2, NGD_PASSES); for (l=0; l<npasses; l++) { /* Set-up and solve the diffusion equation */ nswaps = 0; /************************/ /* Solve flow equations */ /************************/ SetUpConnectGraph(graph, &matrix, (idx_t *)workspace); /* check for disconnected subdomains */ for(i=0; i<matrix.nrows; i++) { if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) { cost = (real_t)(ctrl->mype); goto CleanUpAndExit; } } ConjGrad2(&matrix, load, solution, 0.001, workspace); ComputeTransferVector(1, &matrix, solution, transfer, 0); GetThreeMax(nparts, load, &first, &second, &third); if (l%3 == 0) { FastRandomPermute(nvtxs, perm, 1); } else { /*****************************/ /* move dirty vertices first */ /*****************************/ ndirty = 0; for (i=0; i<nvtxs; i++) { if (where[i] != home[i]) ndirty++; } dptr = 0; for (i=0; i<nvtxs; i++) { if (where[i] != home[i]) perm[dptr++] = i; else perm[ndirty++] = i; } PASSERT(ctrl, ndirty == nvtxs); ndirty = dptr; nclean = nvtxs-dptr; FastRandomPermute(ndirty, perm, 0); FastRandomPermute(nclean, perm+ndirty, 0); } if (ctrl->mype == 0) { for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) { i = perm[ii]; if (ed[i] != 0) { cand[k].key = -ed[i]; cand[k++].val = i; } else { cand[--j].key = 0; cand[j].val = i; } } ikvsorti(k, cand); } for (ii=0; ii<nvtxs/3; ii++) { i = (ctrl->mype == 0) ? cand[ii].val : perm[ii]; from = where[i]; /* don't move out the last vertex in a subdomain */ if (psize[from] == 1) continue; clean = (from == home[i]) ? 1 : 0; /* only move from top three or dirty vertices */ if (from != first && from != second && from != third && clean) continue; /* Scatter the sparse transfer row into the dense tmpvec row */ for (j=rowptr[from]+1; j<rowptr[from+1]; j++) tmpvec[colind[j]] = transfer[j]; for (j=xadj[i]; j<xadj[i+1]; j++) { to = where[adjncy[j]]; if (from != to) { if (tmpvec[to] > (flowFactor * nvwgt[i])) { tmpvec[to] -= nvwgt[i]; INC_DEC(psize[to], psize[from], 1); INC_DEC(npwgts[to], npwgts[from], nvwgt[i]); INC_DEC(load[to], load[from], nvwgt[i]); where[i] = to; nswaps++; /* Update external degrees */ ed[i] = 0; for (k=xadj[i]; k<xadj[i+1]; k++) { edge = adjncy[k]; ed[i] += (to != where[edge] ? adjwgt[k] : 0); if (where[edge] == from) ed[edge] += adjwgt[k]; if (where[edge] == to) ed[edge] -= adjwgt[k]; } break; } } } /* Gather the dense tmpvec row into the sparse transfer row */ for (j=rowptr[from]+1; j<rowptr[from+1]; j++) { transfer[j] = tmpvec[colind[j]]; tmpvec[colind[j]] = 0.0; } ASSERT(fabs(rsum(nparts, tmpvec, 1)) < .0001) } if (l % 2 == 1) { balance = rmax(nparts, npwgts)*nparts; if (balance < ubfactor + 0.035) done = 1; if (GlobalSESum(ctrl, done) > 0) break; noswaps = (nswaps > 0) ? 0 : 1; if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2) break; } } graph->mincut = ComputeSerialEdgeCut(graph); totalv = Mc_ComputeSerialTotalV(graph, home); cost = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv; CleanUpAndExit: gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM); return cost; }
/************************************************************************* * This function computes cuts and balance information **************************************************************************/ void ComputePartitionInfoBipartite(graph_t *graph, idx_t nparts, idx_t *where) { idx_t i, j, k, nvtxs, ncon, mustfree=0; idx_t *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr; idx_t *padjncy, *padjwgt, *padjcut; nvtxs = graph->nvtxs; ncon = graph->ncon; xadj = graph->xadj; adjncy = graph->adjncy; vwgt = graph->vwgt; vsize = graph->vsize; adjwgt = graph->adjwgt; if (vwgt == NULL) { vwgt = graph->vwgt = ismalloc(nvtxs, 1, "vwgt"); mustfree = 1; } if (adjwgt == NULL) { adjwgt = graph->adjwgt = ismalloc(xadj[nvtxs], 1, "adjwgt"); mustfree += 2; } printf("%"PRIDX"-way Cut: %5"PRIDX", Vol: %5"PRIDX", ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where)); /* Compute balance information */ kpwgts = ismalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts"); for (i=0; i<nvtxs; i++) { for (j=0; j<ncon; j++) kpwgts[where[i]*ncon+j] += vwgt[i*ncon+j]; } if (ncon == 1) { printf("\tBalance: %5.3"PRREAL" out of %5.3"PRREAL"\n", 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)), 1.0*nparts*vwgt[iargmax(nvtxs, vwgt)]/(1.0*isum(nparts, kpwgts, 1))); } else { printf("\tBalance:"); for (j=0; j<ncon; j++) printf(" (%5.3"PRREAL" out of %5.3"PRREAL")", 1.0*nparts*kpwgts[ncon*iargmax_strd(nparts, kpwgts+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)), 1.0*nparts*vwgt[ncon*iargmax_strd(nvtxs, vwgt+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon))); printf("\n"); } /* Compute p-adjncy information */ padjncy = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjncy"); padjwgt = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); padjcut = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt"); iset(nparts, 0, kpwgts); for (i=0; i<nvtxs; i++) { for (j=xadj[i]; j<xadj[i+1]; j++) { if (where[i] != where[adjncy[j]]) { padjncy[where[i]*nparts+where[adjncy[j]]] = 1; padjcut[where[i]*nparts+where[adjncy[j]]] += adjwgt[j]; if (kpwgts[where[adjncy[j]]] == 0) { padjwgt[where[i]*nparts+where[adjncy[j]]] += vsize[i]; kpwgts[where[adjncy[j]]] = 1; } } } for (j=xadj[i]; j<xadj[i+1]; j++) kpwgts[where[adjncy[j]]] = 0; } for (i=0; i<nparts; i++) kpwgts[i] = isum(nparts, padjncy+i*nparts, 1); printf("Min/Max/Avg/Bal # of adjacent subdomains: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n", kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1))); for (i=0; i<nparts; i++) kpwgts[i] = isum(nparts, padjcut+i*nparts, 1); printf("Min/Max/Avg/Bal # of adjacent subdomain cuts: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n", kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1))); for (i=0; i<nparts; i++) kpwgts[i] = isum(nparts, padjwgt+i*nparts, 1); printf("Min/Max/Avg/Bal/Frac # of interface nodes: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL" %7.3"PRREAL"\n", kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)), 1.0*isum(nparts, kpwgts, 1)/(1.0*nvtxs)); if (mustfree == 1 || mustfree == 3) { gk_free((void **)&vwgt, LTERM); graph->vwgt = NULL; } if (mustfree == 2 || mustfree == 3) { gk_free((void **)&adjwgt, LTERM); graph->adjwgt = NULL; } gk_free((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM); }