/************************************************************** * This subroutine remaps a partitioning on a single processor **************************************************************/ void SerialRemap(GraphType *graph, int nparts, idxtype *base, idxtype *scratch, idxtype *remap, float *tpwgts) { int i, ii, j, k; int nvtxs, nmapped, max_mult; int from, to, current_from, smallcount, bigcount; KeyValueType *flowto, *bestflow; KeyKeyValueType *sortvtx; idxtype *vsize, *htable, *map, *rowmap; nvtxs = graph->nvtxs; vsize = graph->vsize; max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts); sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx"); flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto"); bestflow = flowto+nparts; map = htable = idxsmalloc(nparts*2, -1, "htable"); rowmap = map+nparts; for (i=0; i<nvtxs; i++) { sortvtx[i].key1 = base[i]; sortvtx[i].key2 = vsize[i]; sortvtx[i].val = i; } qsort((void *)sortvtx, (size_t)nvtxs, (size_t)sizeof(KeyKeyValueType), SSMIncKeyCmp); for (j=0; j<nparts; j++) { flowto[j].key = 0; flowto[j].val = j; } /* this step has nparts*nparts*log(nparts) computational complexity */ bigcount = smallcount = current_from = 0; for (ii=0; ii<nvtxs; ii++) { i = sortvtx[ii].val; from = base[i]; to = scratch[i]; if (from > current_from) { /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERTS(idxsum(nparts, htable) == -nparts); ikeysort(smallcount, flowto); for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } smallcount = 0; current_from = from; } if (htable[to] == -1) { htable[to] = smallcount; flowto[smallcount].key = -vsize[i]; flowto[smallcount].val = to; smallcount++; } else { flowto[htable[to]].key += -vsize[i]; } } /* reset the hash table */ for (j=0; j<smallcount; j++) htable[flowto[j].val] = -1; ASSERTS(idxsum(nparts, htable) == -nparts); ikeysort(smallcount, flowto); for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) { bestflow[bigcount].key = flowto[j].key; bestflow[bigcount].val = current_from*nparts+flowto[j].val; } ikeysort(bigcount, bestflow); ASSERTS(idxsum(nparts, map) == -nparts); ASSERTS(idxsum(nparts, rowmap) == -nparts); nmapped = 0; /* now make as many assignments as possible */ for (ii=0; ii<bigcount; ii++) { i = bestflow[ii].val; j = i % nparts; /* to */ k = i / nparts; /* from */ if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; } if (nmapped == nparts) break; } /* remap the rest */ /* it may help try remapping to the same label first */ if (nmapped < nparts) { for (j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (ii=0; ii<nparts; ii++) { i = (j+ii) % nparts; if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) for (i=0; i<nparts; i++) map[i] = i; for (i=0; i<nvtxs; i++) remap[i] = map[remap[i]]; GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM); }
/************************************************************************* * This function computes the assignment using the the objective the * minimization of the total volume of data that needs to move **************************************************************************/ void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map, WorkSpaceType *wspace, int npasses, int ncon) { int i, ii, j, k, nparts, mype; int pass, maxipwgt, nmapped, oldwgt, newwgt, done; idxtype *rowmap, *mylpwgts; KeyValueType *recv, send; int nsaved, gnsaved; mype = ctrl->mype; nparts = ctrl->nparts; recv = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv"); mylpwgts = idxmalloc(nparts, "mylpwgts"); done = nmapped = 0; idxset(nparts, -1, map); rowmap = idxset(nparts, -1, wspace->pv3); idxcopy(nparts, lpwgts, mylpwgts); for (pass=0; pass<npasses; pass++) { maxipwgt = idxamax(nparts, mylpwgts); if (mylpwgts[maxipwgt] > 0 && !done) { send.key = -mylpwgts[maxipwgt]; send.val = mype*nparts+maxipwgt; } else { send.key = 0; send.val = -1; } /* each processor sends its selection */ MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); ikeysort(nparts, recv); if (recv[0].key == 0) break; /* now make as many assignments as possible */ for (ii=0; ii<nparts; ii++) { i = recv[ii].val; if (i == -1) continue; j = i % nparts; k = i / nparts; if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) { map[j] = k; rowmap[k] = j; nmapped++; mylpwgts[j] = 0; if (mype == k) done = 1; } if (nmapped == nparts) break; } if (nmapped == nparts) break; } /* Map unmapped partitions */ if (nmapped < nparts) { for (i=j=0; j<nparts && nmapped<nparts; j++) { if (map[j] == -1) { for (; i<nparts; i++) { if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) { map[j] = i; rowmap[i] = j; nmapped++; break; } } } } } /* check to see if remapping fails (due to dis-similar tpwgts) */ /* if remapping fails, revert to original mapping */ if (nmapped < nparts) { for (i=0; i<nparts; i++) map[i] = i; IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); } else { /* check for a savings */ oldwgt = lpwgts[mype]; newwgt = lpwgts[rowmap[mype]]; nsaved = newwgt - oldwgt; gnsaved = GlobalSESum(ctrl, nsaved); /* undo everything if we don't see a savings */ if (gnsaved <= 0) { for (i=0; i<nparts; i++) map[i] = i; } IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); } GKfree((void **)&recv, (void **)&mylpwgts, LTERM); }