예제 #1
0
/***********************************************************************************
* This function determines the partitions of the vertices assigned to each leaf
************************************************************************************/
void BuildDTLeafContents(ContactInfoType *cinfo, idxtype *sflag)
{
  idxtype i, j, k, nvtxs, ncontacts, nleafs, nind, tnind, tcomm;
  idxtype *part, *leafptr, *leafind, *leafwgt, *leafpart;
  KeyValueType *cand;

  nvtxs    = cinfo->nvtxs;
  nleafs   = cinfo->nleafs;
  part     = cinfo->part;
  leafpart = cinfo->leafpart;
  leafptr  = cinfo->leafptr;
  leafind  = cinfo->leafind;
  leafwgt  = cinfo->leafwgt;

  cand = (KeyValueType *)gk_malloc(sizeof(KeyValueType)*nvtxs, "BuildDTLeafContents: cand");

  for (ncontacts=0, i=0; i<nvtxs; i++) {
    if (sflag[i]) {
      cand[ncontacts].key   = leafpart[i];
      cand[ncontacts++].val = part[i];
    }
  }
  ikeyvalsort(ncontacts, cand);

/*
  for (i=0; i<ncontacts; i++)
    mprintf("%4D %5D %5D\n", i, cand[i].key, cand[i].val);
*/

  idxset(nleafs, 0, leafptr);

  leafind[0] = cand[0].val;
  leafwgt[0] = 1;
  nind = tnind = 1;
  for (i=1; i<ncontacts; i++) {
    if (cand[i].key != cand[i-1].key) {
      leafptr[cand[i-1].key] = nind;
      leafind[tnind]         = cand[i].val;
      leafwgt[tnind++]       = 1;
      nind = 1;
    }
    else if (cand[i].val != cand[i-1].val) {
      leafind[tnind]   = cand[i].val;
      leafwgt[tnind++] = 1;
      nind++;
    }
    else {
      leafwgt[tnind-1]++;
    }
  }
  leafptr[cand[i-1].key] = nind;

  MAKECSR(i, nleafs, leafptr);

  for (tcomm=0; i<nleafs; i++) {
    tcomm += (leafptr[i+1]-leafptr[i]-1)*idxsum(leafptr[i+1]-leafptr[i], leafwgt+leafptr[i], 1);

/*
    if (leafptr[i+1]-leafptr[i] > 1) {
      mprintf("%4D, ", i);
      for (j=leafptr[i]; j<leafptr[i+1]; j++)
        mprintf("[%3D %4D] ", leafind[j], leafwgt[j]);
      mprintf("\n");
    }
*/

  }
      

  mprintf("NLeafs: %D, NLeafIndices: %D, EstimComm: %D\n", nleafs, leafptr[nleafs], tcomm);

/*
for (i=0; i<nleafs; i++) {
  mprintf("Leaf: %D => ", i);
  for (j=leafptr[i]; j<leafptr[i+1]; j++)
    mprintf("[%D %D] ", leafind[j], leafwgt[j]);
  mprintf("\n");
}
*/

  gk_free((void **)&cand, LTERM);
}
예제 #2
0
파일: xyzpart.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function sorts a distributed list of KeyValueType in increasing 
* order, and uses it to compute a partition. It uses samplesort. 
**************************************************************************/
void PartSort(CtrlType *ctrl, GraphType *graph, KeyValueType *elmnts, WorkSpaceType *wspace)
{
  int i, j, k, nvtxs, nrecv, npes=ctrl->npes, mype=ctrl->mype, firstvtx, lastvtx;
  idxtype *scounts, *rcounts, *vtxdist, *perm;
  KeyValueType *relmnts, *mypicks, *allpicks;

  nvtxs   = graph->nvtxs;
  vtxdist = graph->vtxdist;

  scounts = wspace->pv1;
  rcounts = wspace->pv2;

  /* Allocate memory for the splitters */
  mypicks  = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(npes+1), "ParSort: mypicks");
  allpicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*npes*npes, "ParSort: allpicks");

  /* Sort the local elements */
  ikeysort(nvtxs, elmnts);

  /* Select the local npes-1 equally spaced elements */
  for (i=1; i<npes; i++) { 
    mypicks[i-1].key = elmnts[i*(nvtxs/npes)].key;
    mypicks[i-1].val = elmnts[i*(nvtxs/npes)].val;
  }

  /* PrintPairs(ctrl, npes-1, mypicks, "Mypicks"); */

  /* Gather the picks to all the processors */
  MPI_Allgather((void *)mypicks, 2*(npes-1), IDX_DATATYPE, (void *)allpicks, 2*(npes-1), IDX_DATATYPE, ctrl->comm);

  /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */

  /* Sort all the picks */
  ikeyvalsort(npes*(npes-1), allpicks);

  /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */

  /* Select the final splitters. Set the boundaries to simplify coding */
  for (i=1; i<npes; i++)
    mypicks[i] = allpicks[i*(npes-1)];
  mypicks[0].key    = MIN_INT;
  mypicks[npes].key = MAX_INT;

  /* PrintPairs(ctrl, npes+1, mypicks, "Mypicks"); */

  /* Compute the number of elements that belong to each bucket */
  idxset(npes, 0, scounts);
  for (j=i=0; i<nvtxs; i++) {
    if (elmnts[i].key < mypicks[j+1].key || (elmnts[i].key == mypicks[j+1].key && elmnts[i].val < mypicks[j+1].val))
      scounts[j]++;
    else
      scounts[++j]++;
  }
  MPI_Alltoall(scounts, 1, IDX_DATATYPE, rcounts, 1, IDX_DATATYPE, ctrl->comm);

/*
  PrintVector(ctrl, npes, 0, scounts, "Scounts");
  PrintVector(ctrl, npes, 0, rcounts, "Rcounts");
*/

  /* Allocate memory for sorted elements and receive them */
  MAKECSR(i, npes, scounts);
  MAKECSR(i, npes, rcounts);
  nrecv = rcounts[npes];
  if (wspace->nlarge >= nrecv)
    relmnts = (KeyValueType *)wspace->pairs;
  else
    relmnts = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nrecv, "ParSort: relmnts");

  /* Issue the receives first */
  for (i=0; i<npes; i++) 
    MPI_Irecv((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i);

  /* Issue the sends next */
  for (i=0; i<npes; i++) 
    MPI_Isend((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i);

  MPI_Waitall(npes, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(npes, ctrl->sreq, ctrl->statuses);


  /* OK, now do the local sort of the relmnts. Use perm to keep track original order */
  perm = idxmalloc(nrecv, "ParSort: perm");
  for (i=0; i<nrecv; i++) {
    perm[i] = relmnts[i].val;
    relmnts[i].val = i;
  }
  ikeysort(nrecv, relmnts);


  /* Compute what needs to be shifted */
  MPI_Scan((void *)(&nrecv), (void *)(&lastvtx), 1, MPI_INT, MPI_SUM, ctrl->comm);
  firstvtx = lastvtx-nrecv;  

  /*myprintf(ctrl, "first, last: %d %d\n", firstvtx, lastvtx); */

  for (j=0, i=0; i<npes; i++) {
    if (vtxdist[i+1] > firstvtx) {  /* Found the first PE that is passed me */
      if (vtxdist[i+1] >= lastvtx) {
        /* myprintf(ctrl, "Shifting %d elements to processor %d\n", lastvtx-firstvtx, i); */
        for (k=0; k<lastvtx-firstvtx; k++, j++) 
          relmnts[relmnts[j].val].key = i;
      }
      else {
        /* myprintf(ctrl, "Shifting %d elements to processor %d\n", vtxdist[i+1]-firstvtx, i); */
        for (k=0; k<vtxdist[i+1]-firstvtx; k++, j++) 
          relmnts[relmnts[j].val].key = i;

        firstvtx = vtxdist[i+1];
      }
    }
    if (vtxdist[i+1] >= lastvtx)
      break;
  }

  /* Reverse the ordering on the relmnts[].val */
  for (i=0; i<nrecv; i++) {
    ASSERTP(ctrl, relmnts[i].key>=0 && relmnts[i].key<npes, (ctrl, "%d %d\n", i, relmnts[i].key));
    relmnts[i].val = perm[i];
  }

  /* OK, now sent it back */
  /* Issue the receives first */
  for (i=0; i<npes; i++) 
    MPI_Irecv((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i);

  /* Issue the sends next */
  for (i=0; i<npes; i++) 
    MPI_Isend((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i);

  MPI_Waitall(npes, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(npes, ctrl->sreq, ctrl->statuses);


  /* Construct a partition for the graph */
  graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where");
  firstvtx = vtxdist[mype];
  for (i=0; i<nvtxs; i++) {
    ASSERTP(ctrl, elmnts[i].key>=0 && elmnts[i].key<npes, (ctrl, "%d %d\n", i, elmnts[i].key));
    ASSERTP(ctrl, elmnts[i].val>=vtxdist[mype] && elmnts[i].val<vtxdist[mype+1], (ctrl, "%d %d %d %d\n", i, vtxdist[mype], vtxdist[mype+1], elmnts[i].val));
    graph->where[elmnts[i].val-firstvtx] = elmnts[i].key;
  }


  GKfree((void **)&mypicks, (void **)&allpicks, (void **)&perm, LTERM);
  if (wspace->nlarge < nrecv)
    free(relmnts);

}