Пример #1
0
void Zoltan_Input_HG_Init(ZHG *zhg)
{
  zhg->nObj = 0;
  zhg->globalObj = 0;
  zhg->objWeightDim = 0;
  zhg->objWeight = NULL;
  zhg->objGNO = NULL;
  zhg->objGID = NULL;
  zhg->objLID = NULL;
  zhg->numHEdges= NULL;

  zhg->coor = NULL;
  
  zhg->fixed = NULL;

  zhg->GnRepartVtx = 0;
  zhg->GnRepartEdge = 0;

  zhg->Input_Parts = NULL;
  zhg->Output_Parts = NULL;

  zhg->AppObjSizes = NULL;
  zhg->showMoveVol = 0;

  zhg->nHedges = 0;
  zhg->globalHedges = 0;
  zhg->edgeGNO = NULL;
  zhg->Esize = NULL;
  zhg->edgeWeightDim = 0;
  zhg->Ewgt = NULL;
  zhg->pinGNO = NULL;
  zhg->Pin_Procs = NULL;
  zhg->nPins = 0;
  zhg->globalPins = 0;

  zhg->nRecv_GNOs = 0;
  zhg->Recv_GNOs = NULL;
  zhg->VtxPlan = NULL;

  Zoltan_HG_HGraph_Init(&zhg->HG);
}
Пример #2
0
static int Zoltan_PHG_Redistribute_Hypergraph(
    ZZ *zz, 
    PHGPartParams *hgp,     /* Input:  parameters; used only for UseFixedVtx */
    HGraph  *ohg,           /* Input:  Local part of distributed hypergraph */
    int     firstproc,      /* Input:  rank (in ocomm) of the first proc of 
                                       the ncomm*/
    int     *v2Col,         /* Input:  Vertex to processor Column Mapping */
    int     *n2Row,         /* Input:  Net to processor Row Mapping */
    PHGComm *ncomm,         /* Input:  communicators of new distribution */
    HGraph  *nhg,           /* Output: Newly redistributed hypergraph */
    int     **vmap,         /* Output: allocated with the size nhg->nVtx and
                               vertex map from nhg to ohg's local vertex number*/
    int     **vdest         /* Output: allocated with the size nhg->nVtx and
                               stores dest proc in ocomm */
    )
{
    char * yo = "Zoltan_PHG_Redistribute_Hypergraph";
    PHGComm *ocomm = ohg->comm;
    int ierr=ZOLTAN_OK;
    int i, v, n, nPins, nsend, elemsz, nVtx, nEdge;
    int msg_tag = 9999;
    int *proclist=NULL, *sendbuf=NULL;
    int *vno=NULL, *nno=NULL, *dist_x=NULL, *dist_y=NULL,
        *vsn=NULL, *nsn=NULL, *pins=NULL, *cnt=NULL;
    ZOLTAN_COMM_OBJ *plan;    
    
    Zoltan_HG_HGraph_Init (nhg);
    nhg->comm = ncomm;
    
    nhg->dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    nhg->dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    vsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    nsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    vno = (int *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(int));
    nno = (int *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(int));

    if (!nhg->dist_x || !nhg->dist_y || !dist_x || !dist_y ||
        !vsn || !nsn || (ohg->nVtx && !vno) || (ohg->nEdge && !nno) ) {
        uprintf(ocomm, " new comm nProcx=%d nProcy=%d nvtx=%d nedge=%d", ncomm->nProc_x, ncomm->nProc_y, ohg->nVtx, ohg->nEdge);
        MEMORY_ERROR;
    }
      
    for (v = 0; v < ohg->nVtx; ++v)
        ++dist_x[v2Col[v]];
    for (n = 0; n < ohg->nEdge; ++n)
        ++dist_y[n2Row[n]];

    /* UVCUVC: CHECK ASSUMPTION
       This code assumes that the objects in the receive buffer of
       Zoltan_Comm_Do function are
         1- in the increasing processor order,
         2- order of the items send by a processor is preserved.
     */
    

    /* compute prefix sum to find new vertex start numbers; for each processor */
    MPI_Scan(dist_x, vsn, ncomm->nProc_x, MPI_INT, MPI_SUM, ocomm->row_comm);
    /* All reduce to compute how many each processor will have */ 
    MPI_Allreduce(dist_x, &(nhg->dist_x[1]), ncomm->nProc_x, MPI_INT, MPI_SUM, 
                  ocomm->row_comm);
    nhg->dist_x[0] = 0;    
    for (i=1; i<=ncomm->nProc_x; ++i) 
        nhg->dist_x[i] += nhg->dist_x[i-1];
    
    MPI_Scan(dist_y, nsn, ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm);

    MPI_Allreduce(dist_y, &(nhg->dist_y[1]), ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm);
    nhg->dist_y[0] = 0;
    for (i=1; i<=ncomm->nProc_y; ++i)
        nhg->dist_y[i] += nhg->dist_y[i-1];

#ifdef _DEBUG1
    PrintArr(ocomm, "vsn", vsn, ncomm->nProc_x);
    PrintArr(ocomm, "nsn", nsn, ncomm->nProc_y);
#endif
    
    /* find mapping of current LOCAL vertex no (in my node)
       to "new" vertex no LOCAL to dest node*/
    for (v = ohg->nVtx-1; v>=0; --v)
        vno[v] = --vsn[v2Col[v]];
    for (n = ohg->nEdge-1; n>=0; --n)
        nno[n] = --nsn[n2Row[n]];

    nsend = MAX(MAX(ohg->nPins, ohg->nVtx), ohg->nEdge);
    elemsz = MAX(MAX(2, ohg->VtxWeightDim), ohg->EdgeWeightDim);
    elemsz = (sizeof(float)>sizeof(int)) ? sizeof(float)*elemsz : sizeof(int)*elemsz;

    proclist = (int *) ZOLTAN_MALLOC(nsend * sizeof(int));
    sendbuf = (int *) ZOLTAN_MALLOC(nsend * elemsz);

    /* first communicate pins */
    nPins = 0;
    for (v = 0; v < ohg->nVtx; ++v) { 
        for (i = ohg->vindex[v]; i < ohg->vindex[v+1]; ++i) {
#ifdef _DEBUG1
            if ((n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])<0 ||
                (n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])>=ocomm->nProc)
                errexit("vertex %d vedge[%d]=%d n2Row=%d #Proc_x=%d v2Col=%d", i, ohg->vedge[i], n2Row[ohg->vedge[i]], ncomm->nProc_x , v2Col[v]);
#endif
            proclist[nPins]   = firstproc + n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v];
            sendbuf[2*nPins]  = vno[v];
            sendbuf[2*nPins+1]= nno[ohg->vedge[i]];
            ++nPins; 
        }
    }
#ifdef _DEBUG1
    if (nPins!=ohg->nPins) {
        uprintf(ocomm, "sanity check failed nPins(%d)!=hg->nPins(%d)\n", nPins, ohg->nPins);
        errexit("terminating");
    }
#endif

    --msg_tag;
    ierr |= Zoltan_Comm_Create(&plan, ohg->nPins, proclist, ocomm->Communicator,
                               msg_tag, &nPins);

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nPins>1) { /* this processor is not in new comm but receiving data?*/
        uprintf(ocomm, "Something wrong; why I'm receiving data nPins=%d\n", nPins);
        errexit("terminating");
    }
#endif
    
    if (nPins && (pins = (int *) ZOLTAN_MALLOC(nPins * 2 * sizeof(int)))==NULL) 
        MEMORY_ERROR;

    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, 2*sizeof(int),
                   (char *) pins);
    Zoltan_Comm_Destroy(&plan);

    /* now communicate vertex map */
    nsend = 0;
    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) { 
            proclist[nsend] = firstproc+v2Col[v];
            sendbuf[nsend++] = ohg->vmap[v];
        }
    }
        
    --msg_tag; 
    ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator,
                               msg_tag, &nVtx); 

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nVtx>1) { /* this processor is not in new comm but receiving data?*/ 
        uprintf(ocomm, "Something wrong; why I'm receiving data nVtx=%d\n", nVtx);
        errexit("terminating");
    }
#endif

    /* those are only needed in the first row of ncomm */
    *vmap = *vdest = NULL;  
    if (!ncomm->myProc_y && nVtx &&
        (!(*vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int))) ||
         !(*vdest = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))))
        MEMORY_ERROR;
    
    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int),
                   (char *) *vmap);

    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) 
            sendbuf[v] = ocomm->myProc;
    }
    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int),
                   (char *) *vdest);
        
    if (ncomm->myProc!=-1) { /* I'm in the new comm */
        /* ncomm's first row now bcast to other rows */
        MPI_Bcast(&nVtx, 1, MPI_INT, 0, ncomm->col_comm);
#ifdef _DEBUG1
        if (nVtx!=(nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]))
            errexit("nVtx(%d)!= nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x](%d)", nVtx, nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]);
#endif
        if (nVtx && (nhg->vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))==NULL) 
            MEMORY_ERROR;
        for (i=0; i<nVtx; ++i)
            nhg->vmap[i] = i;
    }


    /* now communicate vertex weights */
    if (ohg->VtxWeightDim) {
        if (nVtx)
            nhg->vwgt = (float*) ZOLTAN_MALLOC(nVtx*ohg->VtxWeightDim*sizeof(float));
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->vwgt,
                       ohg->VtxWeightDim*sizeof(float), (char *) nhg->vwgt);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->vwgt, nVtx*ohg->VtxWeightDim, MPI_FLOAT, 0, ncomm->col_comm);
    }    

    /* communicate fixed vertices, if any */
    if (hgp->UseFixedVtx) {
        if (nVtx)
            nhg->fixed_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->fixed_part,
                       sizeof(int), (char *) nhg->fixed_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->fixed_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }    
    /* communicate pref parts, if any */
    if (hgp->UsePrefPart) {
        if (nVtx)
            nhg->pref_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->pref_part,
                       sizeof(int), (char *) nhg->pref_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->pref_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }    

    /* this comm plan is no longer needed. */
    Zoltan_Comm_Destroy(&plan);

    
    if (ohg->EdgeWeightDim) { /* now communicate edge weights */
        nsend = 0;
        if (!ocomm->myProc_x)  /* only first column sends to first column of ncomm */
            for (n = 0; n < ohg->nEdge; ++n) 
                proclist[nsend++] = firstproc + n2Row[n]*ncomm->nProc_x;
    
        --msg_tag;
        ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator,
                                   msg_tag, &nEdge);

#ifdef _DEBUG1
        if (ncomm->myProc==-1 && nEdge>1) { /* this processor is not in new comm but receiving data?*/
            uprintf(ocomm, "Something wrong; why I'm receiving data nEdge=%d\n", nEdge);
            errexit("terminating");
        }
#endif
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            MPI_Bcast(&nEdge, 1, MPI_INT, 0, ncomm->row_comm);
#ifdef _DEBUG1
            if (nEdge != (nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]))
            errexit("nEdge(%d)!=nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y](%d)", nEdge, nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]);
#endif
        }
        
        if (nEdge)
            nhg->ewgt = (float*) ZOLTAN_MALLOC(nEdge*ohg->EdgeWeightDim*sizeof(float));
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->ewgt,
                       ohg->EdgeWeightDim*sizeof(float), (char *) nhg->ewgt);
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            if (nEdge) 
                MPI_Bcast(nhg->ewgt, nEdge*ohg->EdgeWeightDim, MPI_FLOAT, 0, 
                          ncomm->row_comm);
        }

        Zoltan_Comm_Destroy(&plan);
    } else 
        nEdge = (ncomm->myProc==-1) 
                ? 0 
                : nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y];
    

    if (ncomm->myProc==-1) {
#ifdef _DEBUG1
        if (nPins || nVtx || nEdge)
            errexit("I should not have any data: hey nPins=%d  nVtx=%d  nEdge=%d\n", nPins, nVtx, nEdge);
#endif
        nhg->nEdge = nhg->nVtx = nhg->nPins = 0;
    } else {
        nhg->nEdge = nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y];
        nhg->nVtx = nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x];
        nhg->nPins = nPins;
    
        /* Unpack the pins received. */
        cnt = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vindex = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vedge = (int *) ZOLTAN_MALLOC(nhg->nPins * sizeof(int));
        
        if (!cnt || !nhg->vindex || (nPins && !nhg->vedge))
            MEMORY_ERROR;

        /* Count the number of pins per vertex */
        for (i = 0; i < nPins; ++i)
            ++cnt[pins[2*i]];
        
        /* Compute prefix sum to represent hindex correctly. */
        for (i = 0; i < nhg->nVtx; ++i)  {
            nhg->vindex[i+1] = nhg->vindex[i] + cnt[i];
            cnt[i] = nhg->vindex[i];
        }

        for (i = 0; i < nPins; ++i) 
            nhg->vedge[cnt[pins[2*i]]++] = pins[2*i+1];
        
        nhg->info               = ohg->info;
        nhg->VtxWeightDim       = ohg->VtxWeightDim;
        nhg->EdgeWeightDim      = ohg->EdgeWeightDim;

        ierr = Zoltan_HG_Create_Mirror(zz, nhg);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN)
            MEMORY_ERROR;
    }

 End:
    Zoltan_Multifree(__FILE__, __LINE__, 10,
                     &proclist, &sendbuf, &pins, &cnt,
                     &vno, &nno, &dist_x, &dist_y, &vsn, &nsn
        );
    
    return ierr;
}
Пример #3
0
static int gather_and_build_remap(
  ZZ *zz, 
  int *new_map,               /* Upon return, flag indicating whether parts
                                 assignments were changed due to remap. */
  int HEcnt,                  /* # of HEs allocated. */
  int *HEinfo                 /* Array of HE info; for each HE, two pins and 
                                 one edge weight. Stored as a single vector
                                 to minimize communication calls.  */
)
{
char *yo = "gather_and_remap";
int ierr = ZOLTAN_OK;
int i, uidx, tmp;
int *each_size = NULL;        /* sizes (# HEs * HEINFO_ENTRIES) for each proc */
int *recvbuf = NULL;          /* Receive buffer for gatherv */
int *displs = NULL;           /* Displacement buffer for gatherv */
int send_size;                /* Local # HEs * HEINFO_ENTRIES */
int total_size;               /* Total # ints in gatherv */
int total_HEcnt;              /* Total (across all procs) number of HEs. */
int max0, max1;               /* Max values of pin 0 and pin 1 for each HE. */
int *match = NULL;            /* Vector describing the matching. 
                                 match[i] = j ==> match[j] = i ==> 
                                 vertices i and j are matched. */
int *used = NULL;             /* Vector indicating which partitions are used
                                 in the matching. */
int limit;                    /* Maximum number of matches that are allowed */
HGraph hg;                    /* Hypergraph for matching */
float before_remap = 0,       /* Amount of data that overlaps between old and */
      after_remap = 0;        /* new decomposition before and after remapping, 
                                 respectively. */
float with_oldremap = 0;      /* Amount of data that overlaps between old and
                                 new decomposition using the OldRemap vector
                                 (remapping from the previous decomposition). */


  /* Gather HEs from each processor into a local complete HG. */

  each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int));
  if (!each_size) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }
  send_size = HEcnt * HEINFO_ENTRIES;
  MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT,
                zz->Communicator);

  for (total_size = 0, i = 0; i < zz->Num_Proc; i++) {
    total_size += each_size[i];
  }

  recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int));
  displs = recvbuf + total_size;
  if (!recvbuf) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  displs[0] = 0;
  for (i = 1; i < zz->Num_Proc; i++)
    displs[i] = displs[i-1] + each_size[i-1];

  MPI_Allgatherv(HEinfo, send_size, MPI_INT, 
                 recvbuf, each_size, displs, MPI_INT, zz->Communicator);

  total_HEcnt = total_size / HEINFO_ENTRIES;
  for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) {
    tmp = i * HEINFO_ENTRIES;
    if (recvbuf[tmp] > max0) max0 = recvbuf[tmp];
    if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1];
  }
  /* Increment max0 and max1 so that they are the maximum number of unique
     pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */
  max0++;
  max1++;
  
  /* Sanity check */
  /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes
   * does not return the correct number of non-empty partitions, allowing
   * max1 to be less than LB.Num_Global_Parts. 
   * (e.g., ewgt.adaptive-partlocal1-v3.4.?).
   */
  if (max1 > zz->LB.Num_Global_Parts) 
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1.");

  /* Set up global HG */

  Zoltan_HG_HGraph_Init(&hg);
  if (total_HEcnt) {
    hg.nVtx = max0 + zz->LB.Num_Global_Parts;  
    hg.nEdge = total_HEcnt;
    hg.nPins = total_HEcnt * 2;   /* two pins per HE */
    hg.EdgeWeightDim = 1;
    hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float));
    hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int));
    hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int));
    if (!hg.ewgt || !hg.hindex || !hg.hvertex) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < total_HEcnt; i++) {
      tmp = i * HEINFO_ENTRIES;
      hg.hindex[i] = i+i; 
      hg.hvertex[i+i] = recvbuf[tmp];
      hg.hvertex[i+i+1] = recvbuf[tmp+1]+max0;
      hg.ewgt[i] = recvbuf[tmp+2];
    }
    hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt;

    ierr = Zoltan_HG_Create_Mirror(zz, &hg);
    if (ierr < 0) goto End;
  }

  before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE");

  /* Compute the amount of overlap when using the old remap vector. */

  with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD");

  /* Do matching */

  match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int));
  used = match + hg.nVtx;
  if (hg.nVtx && !match) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  /* Max # matches allowed */
  limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); 
  do_match(zz, &hg, match, limit);

      
  /* Build remapping vector, if non-trivial matching was returned. */

  *new_map = 0;
  for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
    if (match[i+max0] != i+max0) {
      *new_map = 1;
      break;
    }

  if (*new_map) {

    zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int));
    if (!(zz->LB.Remap)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }


    /* First, process all parts that were matched. Mark matched parts as used.*/

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      zz->LB.Remap[i] = -1; 
      tmp = match[i+max0];
      if (tmp != i+max0) {
        zz->LB.Remap[i] = tmp;
        used[tmp] = 1;
      }
    }

    /* Second, process unmatched parts; if possible, keep same part number. */

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      if (!used[i]) {  /* Keep the same part number if it is not used */
        zz->LB.Remap[i] = i;
        used[i] = 1;
      }
    }
  
    /* Third, process remaining unmatched parts; assign them to 
       unused partitions.*/
  
    for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      while (used[uidx]) uidx++;   /* Find next unused partition */
      zz->LB.Remap[i] = uidx;
      used[uidx] = 1;
    }
  }

  if (*new_map) 
    after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER ");

  if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) {
    /* No benefit from remapping; don't keep it! */
    ZOLTAN_FREE(&zz->LB.Remap);
    ZOLTAN_FREE(&zz->LB.OldRemap);
    *new_map = 0;
  }
  else if (with_oldremap >= after_remap) {
    /* The old remap vector is better than the new one; keep the old one. */
    ZOLTAN_FREE(&zz->LB.Remap);
    zz->LB.Remap = zz->LB.OldRemap;
    zz->LB.OldRemap = NULL;
    *new_map = 1;
  }
  else {
    /* Going to use the new remap vector; free the old one. */
    ZOLTAN_FREE(&zz->LB.OldRemap);
  }

  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc &&
      zz->LB.Remap) 
    for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
      printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]);

End:
  ZOLTAN_FREE(&match);
  ZOLTAN_FREE(&each_size);
  ZOLTAN_FREE(&recvbuf);
  Zoltan_HG_HGraph_Free(&hg);
  return ierr;
}
Пример #4
0
int Zoltan_PHG_Gather_To_All_Procs(
  ZZ *zz, 
  HGraph *phg,           /* Input:   Local part of distributed hypergraph */
  PHGPartParams *hgp,        /* Input:   Hypergraph parameters */
  PHGComm *scomm,        /* Input:   Serial PHGComm for use by shg. */
  HGraph **gathered_hg   /* Output:  combined hypergraph combined to proc */
)
{
/* 
 * Function to gather distributed hypergraph onto each processor for
 * coarsest partitioning.
 * First hypergraph arrays for the hypergraph on a column of processors
 * are built using MPI_Allgathers down the processor columns.
 * These hypergraph arrays contain complete info about a subset of vertices.
 * Second the column hypergraphs are gathered along processor rows.
 * Each processor then has a complete description of the hypergraph.
 */
char *yo = "Zoltan_PHG_Gather_To_All_Procs";
int ierr = ZOLTAN_OK;
int i, tmp, sum;
int *each = NULL,
    *disp = NULL;      /* Size and displacement arrays for MPI_Allgatherv */
int *send_buf = NULL;    /* Buffer of values to be sent */
int send_size;           /* Size of buffer send_buf */
int *col_vedge = NULL;   /* vedge array for the proc-column hypergraph */
int *col_vindex = NULL;  /* vindex array for the proc-column hypergraph */
int *col_hvertex = NULL; /* hvertex array for the proc-column hypergraph */
int *col_hindex = NULL;  /* hindex array for the proc-column hypergraph */
int col_nVtx;            /* Number of vertices in processor column */
int col_nEdge;           /* Number of edges in processor column */
int col_nPin;            /* Number of pins in processor column */

int *recv_size = NULL;   /* nPins for each proc in col or row */

HGraph *shg;             /* Pointer to the serial hypergraph to be
                            returned by this function. */

int myProc_x = phg->comm->myProc_x;
int nProc_x = phg->comm->nProc_x;
int nProc_y = phg->comm->nProc_y;
int max_nProc_xy = MAX(nProc_x, nProc_y);

  if (phg->comm->nProc == 1) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Do not call this routine on one proc.");
    return ZOLTAN_FATAL;
  }

#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, phg, NULL, stdout, "GatherBefore");/* NULL parts for now;
                                                           add non-NULL later */
#endif

  /******************************************************************
   *  0. Allocate the hypergraph to be returned. 
   *  Set values that we already know. 
   ******************************************************************/

  shg = *gathered_hg = (HGraph *) ZOLTAN_MALLOC(sizeof(HGraph));
  if (!shg) MEMORY_ERROR;

  Zoltan_HG_HGraph_Init(shg);
  shg->nVtx = phg->dist_x[nProc_x];    /* TODO64 - can this exceed 2B? */
  shg->nEdge = phg->dist_y[nProc_y];

  shg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  shg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  if (!shg->dist_x || !shg->dist_y) MEMORY_ERROR;

  shg->dist_x[0] = shg->dist_y[0] = 0;
  shg->dist_x[1] = shg->nVtx;
  shg->dist_y[1] = shg->nEdge;

  shg->comm = scomm;

  shg->EdgeWeightDim = phg->EdgeWeightDim;
  shg->VtxWeightDim = phg->VtxWeightDim;
  if (shg->VtxWeightDim && shg->nVtx)
    shg->vwgt = (float *) ZOLTAN_MALLOC(shg->nVtx * shg->VtxWeightDim 
                                                  * sizeof(float));
  if (shg->EdgeWeightDim && shg->nEdge)
    shg->ewgt = (float *) ZOLTAN_MALLOC(shg->nEdge * shg->EdgeWeightDim 
                                                  * sizeof(float));
  /* Fixed vertices */
  shg->bisec_split = phg->bisec_split;
  if (hgp->UseFixedVtx)
    shg->fixed_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  if (hgp->UsePrefPart)
    shg->pref_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  
  /* Allocate arrays for use in gather operations */
  recv_size = (int *) ZOLTAN_MALLOC(3 * max_nProc_xy * sizeof(int));
  each = recv_size + max_nProc_xy;
  disp = each + max_nProc_xy;
 
  /* TODO64 - phg->dist_y[nProc_y] could exceed 2 Billion, NO? */
  send_size = MAX(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x], 
                  phg->dist_y[nProc_y]);
  send_buf = (int *) ZOLTAN_MALLOC(send_size * sizeof(int));
  

  if ((shg->VtxWeightDim && shg->nVtx && !shg->vwgt) ||
      (shg->EdgeWeightDim && shg->nEdge && !shg->ewgt) || !recv_size ||
      (send_size && !send_buf)) 
    MEMORY_ERROR;
  

  /*************************************************************
   *  1. Gather all non-zeros for vertices in processor column *
   *************************************************************/
  
  if (nProc_y == 1) {
    /* 
     * Don't need a gather; just set pointers appropriately for row-gather
     * in Step 2 below.
     */

    col_nVtx = phg->nVtx;
    col_nEdge = phg->nEdge;
    col_nPin = phg->nPins;
    col_vindex = phg->vindex;
    col_vedge = phg->vedge;
    col_hindex = phg->hindex;
    col_hvertex = phg->hvertex;

    for (i = 0; i < shg->EdgeWeightDim * shg->nEdge; i++)
      shg->ewgt[i] = phg->ewgt[i];
  }

  else {

    /* Gather local size info for each proc in column */

    MPI_Allgather(&(phg->nPins), 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->col_comm);
  
    /* Compute number of vtx, edge, and nnz in column */
    col_nVtx = (int)(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x]);
    col_nEdge = phg->dist_y[nProc_y];   /* SCHEMEA */
    col_nPin = 0;
    for (i = 0; i < nProc_y; i++) {
      col_nPin += recv_size[i];
    }
    
    /* Allocate arrays for column hypergraph */
    col_hindex = (int *) ZOLTAN_CALLOC((col_nEdge+1), sizeof(int));
    col_hvertex = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    col_vindex = (int *) ZOLTAN_CALLOC((col_nVtx+1), sizeof(int));
    col_vedge = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    if (!col_vindex || !col_hindex || 
        (col_nPin && (!col_vedge || !col_hvertex)))
      MEMORY_ERROR;
    
    /* Gather hvertex data for all procs in column */
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex LNO to GNO here. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_y; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(phg->hvertex, phg->nPins, MPI_INT,
                   col_hvertex, recv_size, disp, MPI_INT, phg->comm->col_comm);
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex GNO to LNO here */
  
    /* Gather hindex data for all procs in column */
  
    for (i = 0; i < phg->nEdge; i++)
      send_buf[i] = phg->hindex[i+1] - phg->hindex[i];
  
    /* SCHEMEA can assume a recv for each edge;
     * SCHEMEB needs to gather the number of edges recv'd from each proc. */
  
    for (i = 0; i < nProc_y; i++) 
      each[i] = phg->dist_y[i+1] - phg->dist_y[i];

    disp[0] = 0;  /* Can't use dist_y because it may not be sizeof(int) */
    for (i=1; i < nProc_y; i++){
      disp[i] = disp[i-1] + each[i-1];
    }
  
    /* SCHEMEA can use phg->dist_y for displacement array.
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, phg->nEdge, MPI_INT, 
                   col_hindex, each, disp, MPI_INT, phg->comm->col_comm);
  
    /* Perform prefix sum on col_hindex */
    sum = 0;
    for (i = 0; i < col_nEdge; i++) {
      tmp = col_hindex[i];
      col_hindex[i] = sum;
      sum += tmp;
    }
    col_hindex[col_nEdge] = sum;

    /* Sanity check */
    if (col_hindex[col_nEdge] != col_nPin) {
      printf("%d Sanity check failed:  "
             "col_hindex[col_nEdge] %d != col_nPin %d\n", 
              zz->Proc, col_hindex[col_nEdge], col_nPin);
      exit(-1);
    }
  
    /* Gather edge weights, if any. */
    if (shg->EdgeWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->EdgeWeightDim;
      for (i = 1; i < nProc_y; i++) {
        each[i] *= phg->EdgeWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->ewgt, phg->nEdge*phg->EdgeWeightDim, MPI_FLOAT, 
                     shg->ewgt, each, disp, MPI_FLOAT, phg->comm->col_comm);
    }
   
  
    Zoltan_HG_Mirror(col_nEdge, col_hindex, col_hvertex, 
                     col_nVtx, col_vindex, col_vedge);
  
  }  /* End column-gather */
  
  /*************************************************************
   *  2. Gather all non-zeros for edges in processor rows      *
   *  All processors in a processor column now have the same   *
   *  hypergraph; we now gather it across rows.                *
   *************************************************************/

  if (nProc_x == 1) {
    /* 
     * Don't need a gather across the row; just set pointers appropriately
     * in shg.
     */
    shg->vindex = col_vindex;
    shg->vedge = col_vedge;
    shg->hindex = col_hindex;
    shg->hvertex = col_hvertex;

    /* Copy vwgt and fixed arrays so shg owns this memory */
    for (i = 0; i < shg->VtxWeightDim*shg->nVtx; i++)
      shg->vwgt[i] = phg->vwgt[i];
    if (hgp->UseFixedVtx)
      for (i = 0; i < shg->nVtx; i++)
        shg->fixed_part[i] = phg->fixed_part[i];
    if (hgp->UsePrefPart)
      for (i = 0; i < shg->nVtx; i++)
        shg->pref_part[i] = phg->pref_part[i];
  }

  else {

    /* Gather info about size within the row */

    MPI_Allgather(&col_nPin, 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->row_comm);
  
    tmp = 0;
    for (i = 0; i < nProc_x; i++) 
      tmp += recv_size[i];

    shg->nPins = tmp;
  
    shg->vindex = (int *) ZOLTAN_CALLOC((shg->nVtx+1), sizeof(int));
    shg->vedge = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
    shg->hindex = (int *) ZOLTAN_CALLOC((shg->nEdge+1), sizeof(int));
    shg->hvertex = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
   
    if (!shg->vindex || !shg->hindex ||
        (shg->nPins && (!shg->vedge || !shg->hvertex)))
      MEMORY_ERROR;
    
    /* Gather vedge data for all procs in row */
  
    /* SCHEMEA can send local edge numbers; 
       SCHEMEB requires edge LNO to GNO conversion. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_x; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(col_vedge, col_nPin, MPI_INT,
                   shg->vedge, recv_size, disp, MPI_INT, phg->comm->row_comm);
  
    /* Gather vindex data for all procs in row */
  
    for (i = 0; i < col_nVtx; i++)
      send_buf[i] = col_vindex[i+1] - col_vindex[i];
  
    /* SCHEMEA can assume a recv for each vertex;
     * SCHEMEB would need to gather the number of vtxs recv'd from each proc. */
  
    for (i = 0; i < nProc_x; i++) 
      each[i] = (int)(phg->dist_x[i+1] - phg->dist_x[i]);

    disp[0] = 0;  /* Can't use dist_x, may not be sizeof(int) */
    for (i = 1; i < nProc_x; i++) 
      disp[i] = disp[i-1] + each[i-1];

    /* SCHEMEA can use phg->dist_x as displacement array;
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, col_nVtx, MPI_INT, 
                   shg->vindex, each, disp,
                   MPI_INT, phg->comm->row_comm);

    /* Perform prefix sum on shg->vindex */
    sum = 0;
    for (i = 0; i < shg->nVtx; i++) {
      tmp = shg->vindex[i];
      shg->vindex[i] = sum;
      sum += tmp;
    }
    shg->vindex[shg->nVtx] = sum;
  
    /* Sanity check */
    if (shg->vindex[shg->nVtx] != shg->nPins) {
      printf("%d Sanity check failed:  "
             "shg->vindex %d != nPins %d\n", 
              zz->Proc, shg->vindex[shg->nVtx], shg->nPins);
      exit(-1);
    }
  
    /* Gather fixed array, if any  */
    if (hgp->UseFixedVtx){
  
#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather before gather. phg->fixed =");
      for (i=0; i<phg->nVtx; i++){
        printf(" %d ", phg->fixed_part[i]);
      }
      printf("\n");
#endif

      /* Can use the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->fixed_part, phg->nVtx, MPI_FLOAT, 
                     shg->fixed_part, each, disp, MPI_FLOAT, phg->comm->row_comm);

#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather after gather. shg->fixed =");
      for (i=0; i<shg->nVtx; i++){
        printf(" %d ", shg->fixed_part[i]);
      }
      printf("\n");
#endif
    }
    /* Gather pref part array, if any  */
    if (hgp->UsePrefPart){
      /* Can use the same each array. */
      /* Need to compute new disp array. */
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->pref_part, phg->nVtx, MPI_FLOAT, 
                     shg->pref_part, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
    
    /* Gather vertex weights, if any. */
    if (shg->VtxWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->VtxWeightDim;
      for (i = 1; i < nProc_x; i++) {
        each[i] *= phg->VtxWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->vwgt, phg->nVtx*phg->VtxWeightDim, MPI_FLOAT, 
                     shg->vwgt, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
  
    Zoltan_HG_Mirror(shg->nVtx, shg->vindex, shg->vedge, 
                     shg->nEdge, shg->hindex, shg->hvertex);

  }  /* End row gather */
  
#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, shg, NULL, stdout, "GatherAfter");/* NULL parts for now;
                                                           add non-NULL later */
  Zoltan_PHG_Plot_2D_Distrib(zz, phg);
  Zoltan_PHG_Plot_2D_Distrib(zz, shg);
#endif

End:

  if (ierr < 0) {
    Zoltan_HG_HGraph_Free(*gathered_hg);
    ZOLTAN_FREE(gathered_hg);
  }

  Zoltan_Multifree(__FILE__, __LINE__, 2, &send_buf, 
                                          &recv_size);

  if (nProc_x > 1 && nProc_y > 1) 
    Zoltan_Multifree(__FILE__, __LINE__, 4, &col_vedge,
                                            &col_vindex,
                                            &col_hvertex,
                                            &col_hindex);
  return ierr;
}