/*********************************************************************************** * This function determines the partitions of the vertices assigned to each leaf ************************************************************************************/ void BuildDTLeafContents(ContactInfoType *cinfo, idxtype *sflag) { idxtype i, j, k, nvtxs, ncontacts, nleafs, nind, tnind, tcomm; idxtype *part, *leafptr, *leafind, *leafwgt, *leafpart; KeyValueType *cand; nvtxs = cinfo->nvtxs; nleafs = cinfo->nleafs; part = cinfo->part; leafpart = cinfo->leafpart; leafptr = cinfo->leafptr; leafind = cinfo->leafind; leafwgt = cinfo->leafwgt; cand = (KeyValueType *)gk_malloc(sizeof(KeyValueType)*nvtxs, "BuildDTLeafContents: cand"); for (ncontacts=0, i=0; i<nvtxs; i++) { if (sflag[i]) { cand[ncontacts].key = leafpart[i]; cand[ncontacts++].val = part[i]; } } ikeyvalsort(ncontacts, cand); /* for (i=0; i<ncontacts; i++) mprintf("%4D %5D %5D\n", i, cand[i].key, cand[i].val); */ idxset(nleafs, 0, leafptr); leafind[0] = cand[0].val; leafwgt[0] = 1; nind = tnind = 1; for (i=1; i<ncontacts; i++) { if (cand[i].key != cand[i-1].key) { leafptr[cand[i-1].key] = nind; leafind[tnind] = cand[i].val; leafwgt[tnind++] = 1; nind = 1; } else if (cand[i].val != cand[i-1].val) { leafind[tnind] = cand[i].val; leafwgt[tnind++] = 1; nind++; } else { leafwgt[tnind-1]++; } } leafptr[cand[i-1].key] = nind; MAKECSR(i, nleafs, leafptr); for (tcomm=0; i<nleafs; i++) { tcomm += (leafptr[i+1]-leafptr[i]-1)*idxsum(leafptr[i+1]-leafptr[i], leafwgt+leafptr[i], 1); /* if (leafptr[i+1]-leafptr[i] > 1) { mprintf("%4D, ", i); for (j=leafptr[i]; j<leafptr[i+1]; j++) mprintf("[%3D %4D] ", leafind[j], leafwgt[j]); mprintf("\n"); } */ } mprintf("NLeafs: %D, NLeafIndices: %D, EstimComm: %D\n", nleafs, leafptr[nleafs], tcomm); /* for (i=0; i<nleafs; i++) { mprintf("Leaf: %D => ", i); for (j=leafptr[i]; j<leafptr[i+1]; j++) mprintf("[%D %D] ", leafind[j], leafwgt[j]); mprintf("\n"); } */ gk_free((void **)&cand, LTERM); }
/************************************************************************* * This function sorts a distributed list of KeyValueType in increasing * order, and uses it to compute a partition. It uses samplesort. **************************************************************************/ void PartSort(CtrlType *ctrl, GraphType *graph, KeyValueType *elmnts, WorkSpaceType *wspace) { int i, j, k, nvtxs, nrecv, npes=ctrl->npes, mype=ctrl->mype, firstvtx, lastvtx; idxtype *scounts, *rcounts, *vtxdist, *perm; KeyValueType *relmnts, *mypicks, *allpicks; nvtxs = graph->nvtxs; vtxdist = graph->vtxdist; scounts = wspace->pv1; rcounts = wspace->pv2; /* Allocate memory for the splitters */ mypicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(npes+1), "ParSort: mypicks"); allpicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*npes*npes, "ParSort: allpicks"); /* Sort the local elements */ ikeysort(nvtxs, elmnts); /* Select the local npes-1 equally spaced elements */ for (i=1; i<npes; i++) { mypicks[i-1].key = elmnts[i*(nvtxs/npes)].key; mypicks[i-1].val = elmnts[i*(nvtxs/npes)].val; } /* PrintPairs(ctrl, npes-1, mypicks, "Mypicks"); */ /* Gather the picks to all the processors */ MPI_Allgather((void *)mypicks, 2*(npes-1), IDX_DATATYPE, (void *)allpicks, 2*(npes-1), IDX_DATATYPE, ctrl->comm); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Sort all the picks */ ikeyvalsort(npes*(npes-1), allpicks); /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */ /* Select the final splitters. Set the boundaries to simplify coding */ for (i=1; i<npes; i++) mypicks[i] = allpicks[i*(npes-1)]; mypicks[0].key = MIN_INT; mypicks[npes].key = MAX_INT; /* PrintPairs(ctrl, npes+1, mypicks, "Mypicks"); */ /* Compute the number of elements that belong to each bucket */ idxset(npes, 0, scounts); for (j=i=0; i<nvtxs; i++) { if (elmnts[i].key < mypicks[j+1].key || (elmnts[i].key == mypicks[j+1].key && elmnts[i].val < mypicks[j+1].val)) scounts[j]++; else scounts[++j]++; } MPI_Alltoall(scounts, 1, IDX_DATATYPE, rcounts, 1, IDX_DATATYPE, ctrl->comm); /* PrintVector(ctrl, npes, 0, scounts, "Scounts"); PrintVector(ctrl, npes, 0, rcounts, "Rcounts"); */ /* Allocate memory for sorted elements and receive them */ MAKECSR(i, npes, scounts); MAKECSR(i, npes, rcounts); nrecv = rcounts[npes]; if (wspace->nlarge >= nrecv) relmnts = (KeyValueType *)wspace->pairs; else relmnts = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nrecv, "ParSort: relmnts"); /* Issue the receives first */ for (i=0; i<npes; i++) MPI_Irecv((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; i<npes; i++) MPI_Isend((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i); MPI_Waitall(npes, ctrl->rreq, ctrl->statuses); MPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* OK, now do the local sort of the relmnts. Use perm to keep track original order */ perm = idxmalloc(nrecv, "ParSort: perm"); for (i=0; i<nrecv; i++) { perm[i] = relmnts[i].val; relmnts[i].val = i; } ikeysort(nrecv, relmnts); /* Compute what needs to be shifted */ MPI_Scan((void *)(&nrecv), (void *)(&lastvtx), 1, MPI_INT, MPI_SUM, ctrl->comm); firstvtx = lastvtx-nrecv; /*myprintf(ctrl, "first, last: %d %d\n", firstvtx, lastvtx); */ for (j=0, i=0; i<npes; i++) { if (vtxdist[i+1] > firstvtx) { /* Found the first PE that is passed me */ if (vtxdist[i+1] >= lastvtx) { /* myprintf(ctrl, "Shifting %d elements to processor %d\n", lastvtx-firstvtx, i); */ for (k=0; k<lastvtx-firstvtx; k++, j++) relmnts[relmnts[j].val].key = i; } else { /* myprintf(ctrl, "Shifting %d elements to processor %d\n", vtxdist[i+1]-firstvtx, i); */ for (k=0; k<vtxdist[i+1]-firstvtx; k++, j++) relmnts[relmnts[j].val].key = i; firstvtx = vtxdist[i+1]; } } if (vtxdist[i+1] >= lastvtx) break; } /* Reverse the ordering on the relmnts[].val */ for (i=0; i<nrecv; i++) { ASSERTP(ctrl, relmnts[i].key>=0 && relmnts[i].key<npes, (ctrl, "%d %d\n", i, relmnts[i].key)); relmnts[i].val = perm[i]; } /* OK, now sent it back */ /* Issue the receives first */ for (i=0; i<npes; i++) MPI_Irecv((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i); /* Issue the sends next */ for (i=0; i<npes; i++) MPI_Isend((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i); MPI_Waitall(npes, ctrl->rreq, ctrl->statuses); MPI_Waitall(npes, ctrl->sreq, ctrl->statuses); /* Construct a partition for the graph */ graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where"); firstvtx = vtxdist[mype]; for (i=0; i<nvtxs; i++) { ASSERTP(ctrl, elmnts[i].key>=0 && elmnts[i].key<npes, (ctrl, "%d %d\n", i, elmnts[i].key)); ASSERTP(ctrl, elmnts[i].val>=vtxdist[mype] && elmnts[i].val<vtxdist[mype+1], (ctrl, "%d %d %d %d\n", i, vtxdist[mype], vtxdist[mype+1], elmnts[i].val)); graph->where[elmnts[i].val-firstvtx] = elmnts[i].key; } GKfree((void **)&mypicks, (void **)&allpicks, (void **)&perm, LTERM); if (wspace->nlarge < nrecv) free(relmnts); }