示例#1
0
/* Frees all memory and sets the heap value back to default */
void Zoltan_Heap_Free (HEAP *h)
{
  if (h->space != 0){
     Zoltan_Multifree(__FILE__, __LINE__, 3, &h->ele, &h->pos, &h->value);
     h->space = 0;
     }
  h->n = 0;
}
示例#2
0
int  Zoltan_Comm_Sort_Ints(
int *vals_sort,     /* values to be sorted */
int *vals_other,    /* other array to be reordered w/ sort */
int  nvals)         /* length of these two arrays */
{
    int *store=NULL, *copy_sort=NULL, *copy_other=NULL, *p;
    int i;
    int top;         /* largest integer to sort, smallest is 0 by assumption */
    int err = ZOLTAN_OK;

    if (nvals < 1 || vals_sort == NULL  || vals_other == NULL)
       return ZOLTAN_FATAL;
    if (nvals == 1)
       return ZOLTAN_OK;           /* fastest way to sort 1 item is to return */
       
    /* find largest value (sort sometimes used for non processor lists) */   
    top = vals_sort[0];
    for (i = 1; i < nvals; i++)
       if (top < vals_sort[i])
           top = vals_sort[i];

    store      = (int*) ZOLTAN_CALLOC (top+2,  sizeof(int));
    copy_sort  = (int*) ZOLTAN_MALLOC (nvals * sizeof(int));
    copy_other = (int*) ZOLTAN_MALLOC (nvals * sizeof(int));

    if (store  &&  copy_sort  &&  copy_other)  {
       memcpy (copy_sort,  vals_sort,  nvals * sizeof(int));
       memcpy (copy_other, vals_other, nvals * sizeof(int));

       p = store+1;
       for (i = 0; i < nvals; i++)
          p[copy_sort[i]]++;                /* count number of occurances */

       for (i = 1; i < top+1; i++)
          p[i] += p[i-1];                   /* compute partial sums */
                                            /* assert: p[top] = nvals */
                                              
       p = store;                           /* effectively shifts down by one */
       for (i = 0; i < nvals; i++)  {
          vals_sort  [p[copy_sort [i]]] = copy_sort [i];
          vals_other [p[copy_sort [i]]] = copy_other[i];
          ++p[copy_sort [i]];
          }
       }
    else
       err =  ZOLTAN_MEMERR;
       
    Zoltan_Multifree (__FILE__, __LINE__, 3, &copy_sort, &copy_other, &store);
    return err;
    }
示例#3
0
文件: phg.c 项目: askhl/octopus-dfrt2
void Zoltan_PHG_Free_Hypergraph_Data(ZHG *zoltan_hg)
{
  if (zoltan_hg != NULL) {
    Zoltan_Multifree(__FILE__, __LINE__, 12, &zoltan_hg->GIDs,
                                            &zoltan_hg->LIDs,
                                            &zoltan_hg->Input_Parts,
                                            &zoltan_hg->Output_Parts,
                                            &zoltan_hg->AppObjSizes,
                                            &zoltan_hg->Remove_EGIDs,
                                            &zoltan_hg->Remove_ELIDs,
                                            &zoltan_hg->Remove_Esize,
                                            &zoltan_hg->Remove_GEsize,
                                            &zoltan_hg->Remove_Ewgt,
                                            &zoltan_hg->Remove_Pin_GIDs,
                                            &zoltan_hg->Remove_Pin_Procs);

    Zoltan_HG_HGraph_Free (&zoltan_hg->HG);
  }
}
示例#4
0
/* path growing matching, hypergraph version */
static int matching_pgm (ZZ *zz, HGraph *hg, int *match, int *limit)
{
int i, j, k, side = 0, edge, vertex, *Match[2] = {NULL, NULL};
int limits[2], neighbor, next_vertex, pins;
double w[2]={0.0,0.0}, weight, max_weight, *sims = NULL;
char  *yo = "matching_pgm";

  limits[0] = limits[1] = *limit;
  Match[0] = match;
  if (hg->nVtx) {
    if (!(Match[1] = (int*)   ZOLTAN_MALLOC (hg->nVtx * sizeof(int)))
     || !(sims     = (double*) ZOLTAN_CALLOC (hg->nVtx,  sizeof(double))) ) {
      Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      return ZOLTAN_MEMERR;
    }
  }

  for (i = 0; i < hg->nVtx; i++)
    Match[1][i] = i;

  for (i = 0; i < hg->nVtx  &&  limits[side] > 0; i++) {
    if (Match[0][i] == i && Match[1][i] == i) {
      vertex = i;
      while (vertex > 0 && limits[side] > 0) {
        max_weight = 0.0;
        next_vertex = -1;

        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          pins = hg->hindex[edge+1] - hg->hindex[edge];
          weight = 2.0 / ((pins-1)*pins);
          if (hg->ewgt)
            weight *= hg->ewgt[edge];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (neighbor != vertex && Match[0][neighbor] == neighbor && 
                Match[1][neighbor]==neighbor)
               sims[neighbor] += weight;
          }
        }
        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (sims[neighbor] > 0.0) {
              if (sims[neighbor] > max_weight) {
                max_weight = sims[neighbor];
                next_vertex = neighbor;
              }
              sims[neighbor] = 0.0;
            }
          }
        }

        if (next_vertex >= 0) {
          Match[side][vertex] = next_vertex;
          Match[side][next_vertex] = vertex;
          limits[side]--;
          w[side] += max_weight;
          side = 1-side;
        }
        vertex = next_vertex;
      }
    }
  }

  if (w[0] < w[1]) {
    for (i = 0; i < hg->nVtx; i++)
      match[i] = Match[1][i];
    *limit = limits[1];
  }
  else
    *limit = limits[0];

  Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
  return ZOLTAN_OK;
}
示例#5
0
static int refine_fm2 (ZZ *zz,
                       HGraph *hg,
                       int p,
                       float *part_sizes,
                       Partition part,
                       PHGPartParams *hgp,
                       float bal_tol
    )
{
    int    i, j, ierr=ZOLTAN_OK, *pins[2]={NULL,NULL}, *lpins[2]={NULL,NULL};
    int    *moves=NULL, *mark=NULL, *adj=NULL, passcnt=0;
    float  *gain=NULL, *lgain=NULL;
    int    best_cutsizeat, cont, successivefails=0;
    double total_weight, weights[2], total_lweight, lweights[2], lwadjust[2],
        max_weight[2], lmax_weight[2], avail[2], gavail[2];
    int availcnt[2], gavailcnt[2];
    double targetw0, ltargetw0, minvw=DBL_MAX;
    double cutsize, best_cutsize, 
        best_limbal, imbal, limbal;
    HEAP   heap[2];
    char   *yo="refine_fm2";
    int    part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
#ifdef HANDLE_ISOLATED_VERTICES    
    int    isocnt=hg->nVtx; /* only root uses isocnt, isolated vertices
                               are kept at the end of moves array */
    int    *deg=NULL, *ldeg=NULL;
#if 0
    double best_imbal;
#endif
#endif
    PHGComm *hgc=hg->comm;
    int rootRank;
    
    struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
    int do_timing = (hgp->use_timers > 2);
    int detail_timing = (hgp->use_timers > 3);

    ZOLTAN_TRACE_ENTER(zz, yo);

    if (p != 2) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "p!=2 not allowed for refine_fm2.");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_FATAL;
    }

    /* return only if globally there is no edge or vertex */
    if (!hg->dist_y[hgc->nProc_y] || hg->dist_x[hgc->nProc_x] == 0) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_OK;
    }


#ifdef USE_SERIAL_REFINEMENT_ON_ONE_PROC
    if (hgc->nProc==1){ /* only one proc? use serial code */
        ZOLTAN_TRACE_EXIT(zz, yo);
        return serial_fm2 (zz, hg, p, part_sizes, part, hgp, bal_tol);
    }
#endif

    if (do_timing) { 
        if (timer->rfrefine < 0) 
            timer->rfrefine = Zoltan_Timer_Init(zz->ZTime, 1, "Ref_P_Total");
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfrefine, hgc->Communicator);
    }
    if (detail_timing) {
        if (timer->rfpins < 0) 
            timer->rfpins = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Pins");
        if (timer->rfiso < 0) 
            timer->rfiso = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_IsolatedVert");
        if (timer->rfgain < 0) 
            timer->rfgain = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Gain");
        if (timer->rfheap < 0) 
            timer->rfheap = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Heap");
        if (timer->rfpass < 0) 
            timer->rfpass = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Pass");
        if (timer->rfroll < 0) 
            timer->rfroll = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Roll");
        if (timer->rfnonroot < 0) 
            timer->rfnonroot = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_NonRoot");
    }
    
    
    /* find the index of the proc in column group with 
       the most #nonzeros; it will be our root
       proc for computing moves since it has better 
       knowedge about global hypergraph.
       We ignore returned #pins (i) in root */
    Zoltan_PHG_Find_Root(hg->nPins, hgc->myProc_y, hgc->col_comm, 
                         &i, &rootRank);
    
    /* Calculate the weights in each partition and total, then maxima */
    weights[0] = weights[1] = 0.0;
    lweights[0] = lweights[1] = 0.0;
    if (hg->vwgt) 
        for (i = 0; i < hg->nVtx; i++) {
            lweights[part[i]] += hg->vwgt[i*hg->VtxWeightDim];
            minvw = (minvw > hg->vwgt[i*hg->VtxWeightDim]) 
                  ? hg->vwgt[i*hg->VtxWeightDim] 
                  : minvw;
        }
    else {
        minvw = 1.0;
        for (i = 0; i < hg->nVtx; i++)
            lweights[part[i]] += 1.0;
    }

    MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
    total_weight = weights[0] + weights[1];
    targetw0 = total_weight * part_sizes[0]; /* global target weight for part 0 */

    max_weight[0] = total_weight * bal_tol * part_sizes[0];
    max_weight[1] = total_weight * bal_tol * part_sizes[part_dim]; /* should be (1 - part_sizes[0]) */


    if (weights[0]==0.0) {
        ltargetw0 = targetw0 / hgc->nProc_x;
        lmax_weight[0] = max_weight[0] / hgc->nProc_x;
    } else {
        lmax_weight[0] = (weights[0]==0.0) ? 0.0 : lweights[0] +
            (max_weight[0] - weights[0]) * ( lweights[0] / weights[0] );
        ltargetw0 = targetw0 * ( lweights[0] / weights[0] ); /* local target weight */
    }
    if (weights[1]==0.0)
        lmax_weight[1] = max_weight[1] / hgc->nProc_x;
    else
        lmax_weight[1] = (weights[1]==0.0) ? 0.0 : lweights[1] +
            (max_weight[1] - weights[1]) * ( lweights[1] / weights[1] );

    total_lweight = lweights[0]+lweights[1];
    
    avail[0] = MAX(0.0, lmax_weight[0]-total_lweight);
    avail[1] = MAX(0.0, lmax_weight[1]-total_lweight);
    availcnt[0] = (avail[0] == 0) ? 1 : 0;
    availcnt[1] = (avail[1] == 0) ? 1 : 0; 
    MPI_Allreduce(avail, gavail, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
    MPI_Allreduce(availcnt, gavailcnt, 2, MPI_INT, MPI_SUM, hgc->row_comm);

#ifdef _DEBUG
    if (gavailcnt[0] || gavailcnt[1])
        uprintf(hgc, "before adjustment, LMW[%.1lf, %.1lf]\n", lmax_weight[0], lmax_weight[1]);
#endif

    if (gavailcnt[0]) 
        lmax_weight[0] += gavail[0] / (double) gavailcnt[0];
    
    if (gavailcnt[1]) 
        lmax_weight[1] += gavail[1] / (double) gavailcnt[1];
    
    /* Our strategy is to stay close to the current local weight balance.
       We do not need the same local balance on each proc, as long as
       we achieve approximate global balance.                            */

#ifdef _DEBUG
    imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
    limbal = (ltargetw0==0.0) ? 0.0 : fabs(lweights[0]-ltargetw0)/ltargetw0;
    uprintf(hgc, "H(%d, %d, %d), FM2: W[%.1lf, %.1lf] MW:[%.1lf, %.1lf] I=%.3lf  LW[%.1lf, %.1lf] LMW[%.1lf, %.1lf] LI=%.3lf\n", hg->nVtx, hg->nEdge, hg->nPins, weights[0], weights[1], max_weight[0], max_weight[1], imbal, lweights[0], lweights[1], lmax_weight[0], lmax_weight[1], limbal);
#endif

    
    if ((hg->nEdge && (!(pins[0]    = (int*) ZOLTAN_MALLOC(2 * hg->nEdge * sizeof(int)))
                      || !(lpins[0] = (int*) ZOLTAN_CALLOC(2 * hg->nEdge, sizeof(int))))) ||
        (hg->nVtx && (!(moves   = (int*)   ZOLTAN_MALLOC(hg->nVtx * sizeof(int)))
                     || !(lgain = (float*) ZOLTAN_MALLOC(hg->nVtx * sizeof(float))))))
        MEMORY_ERROR;

    if (hg->nEdge) {
        pins[1] = &(pins[0][hg->nEdge]);
        lpins[1] = &(lpins[0][hg->nEdge]);
    }

    if (hgc->myProc_y==rootRank) { /* only root needs mark, adj, gain and heaps*/
        Zoltan_Heap_Init(zz, &heap[0], hg->nVtx);
        Zoltan_Heap_Init(zz, &heap[1], hg->nVtx);  
        if (hg->nVtx &&
            (!(mark     = (int*)   ZOLTAN_CALLOC(hg->nVtx, sizeof(int)))
             || !(adj   = (int*)   ZOLTAN_MALLOC(hg->nVtx * sizeof(int)))   
             || !(gain  = (float*) ZOLTAN_MALLOC(hg->nVtx * sizeof(float)))))
            MEMORY_ERROR;
    }

    /* Initial calculation of the local pin distribution (sigma in UVC's papers)  */
    if (detail_timing)         
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfpins, hgc->Communicator);                        
    for (i = 0; i < hg->nEdge; ++i)
        for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j){
            ++(lpins[part[hg->hvertex[j]]][i]);
        }
    if (detail_timing)         
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpins, hgc->Communicator);                    
    

#ifdef HANDLE_ISOLATED_VERTICES        
    /* first compute vertex degree to find any isolated vertices
       we use lgain and gain, as ldeg, deg.*/
    if (hg->nVtx) {
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);        
        ldeg = (int *) lgain;
        deg = (int *) gain; /* null for non-root but that is fine */
        for (i = 0; i < hg->nVtx; ++i)
            ldeg[i] = hg->vindex[i+1] - hg->vindex[i];
        MPI_Reduce(ldeg, deg, hg->nVtx, MPI_INT, MPI_SUM, rootRank,
                   hg->comm->col_comm);

        if (hgc->myProc_y==rootRank) { /* root marks isolated vertices */
            for (i=0; i<hg->nVtx; ++i)
                if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
                    if (!deg[i]) {
                        moves[--isocnt] = i;
                        part[i] = -(part[i]+1); /* remove those vertices from that part*/
                    }
                }
        }   
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);        

    }
#endif
    
    do {
        int v=1, movecnt=0, neggaincnt=0, from, to;
        int maxneggain = (hgp->fm_max_neg_move < 0) ? hg->nVtx : hgp->fm_max_neg_move;
        int notfeasible=(weights[0]>max_weight[0]) || (weights[1]>max_weight[1]);
    
        /* now compute global pin distribution */
        if (hg->nEdge) {
            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfpins, hgc->Communicator);                    
            MPI_Allreduce(lpins[0], pins[0], 2*hg->nEdge, MPI_INT, MPI_SUM, 
                          hgc->row_comm);
            if (detail_timing)         
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpins, hgc->Communicator);                    
        }

        /* now we can compute actual cut */
        best_cutsizeat=0;
        cutsize = 0.0;
        for (i=0; i < hg->nEdge; ++i) {
            if (pins[0][i] && pins[1][i])
                cutsize += (hg->ewgt ? hg->ewgt[i] : 1.0);
        }
        MPI_Allreduce(&cutsize, &best_cutsize, 1, MPI_DOUBLE, MPI_SUM, hgc->col_comm);
        cutsize = best_cutsize;

        imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;        
        best_limbal = limbal = (ltargetw0==0.0) ? 0.0
            : fabs(lweights[0]-ltargetw0)/ltargetw0;

        /* UVCUVC: it looks like instead of moving always from overloaded
           part, alternating the 'from' part gives better results.
           Hence if the imbal is not really bad (2x worse) we use that approach  */
        if (imbal > BADBALANCE*(bal_tol-1.0) ) /* decide which way the moves will be in this pass */
            from = (weights[0] < targetw0) ? 1 : 0;
        else 
            from = passcnt % 2; 
        /* we want to be sure that everybody!!! picks the same source */
        MPI_Bcast(&from, 1, MPI_INT, 0, hgc->Communicator); 

        to = 1-from;
        
#ifdef _DEBUG
        /* Just for debugging */
        best_cutsize = Zoltan_PHG_Compute_NetCut(hgc, hg, part);
        if (best_cutsize!=cutsize) {
            errexit("%s: Initial cutsize=%.2lf Verify: total=%.2lf\n", uMe(hgc), cutsize,
                    best_cutsize);
        }
        if (hgc->myProc_y==rootRank)
            for (i = 0; i< hg->nVtx; ++i)
                if (mark[i])
                    errexit("mark[%d]=%d", i, mark[i]);
        /* debuggging code ends here */
#endif

        /* compute only the gains of the vertices from 'from' part */
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfgain, hgc->Communicator);                    
        
        for (i = 0; i < hg->nVtx; ++i) {
            lgain[i] = 0.0;
            if ((part[i]==from) && (!hgp->UseFixedVtx || hg->fixed_part[i]<0))
                for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
                    int edge = hg->vedge[j];
                    if ((pins[0][edge]+pins[1][edge])>1) { /* if they have at least 2 pins :) */
                        if (pins[part[i]][edge] == 1)
                            lgain[i] += (hg->ewgt ? hg->ewgt[edge] : 1.0);
                        else if (pins[1-part[i]][edge] == 0)
                            lgain[i] -= (hg->ewgt ? hg->ewgt[edge] : 1.0);
                    }
                }
        }
        /* now sum up all gains on only root proc */
        if (hg->nVtx)
            MPI_Reduce(lgain, gain, hg->nVtx, MPI_FLOAT, MPI_SUM, rootRank, 
                       hgc->col_comm);
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfgain, hgc->Communicator);                    
        

        if (hgp->output_level >= PHG_DEBUG_ALL) {
            imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
            printf("%s FM Pass %d (%d->%d) Cut=%.2f W[%5.0f, %5.0f] I= %.2f LW[%5.0f, %5.0f] LI= %.2f\n", uMe(hgc), passcnt, from, to, cutsize, weights[0], weights[1], imbal, lweights[0], lweights[1], limbal);
        }

        if (hgc->myProc_y==rootRank) {
            /* those are the lucky ones; each proc in column-group
               could have compute the same moves concurrently; but for this
               version we'll do it in the root procs and broadcast */

#ifdef HANDLE_ISOLATED_VERTICES
            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);                    
            lwadjust[0] = lwadjust[1] = 0.0;
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int   u=moves[i], pno=-part[u]-1;
                float w=(hg->vwgt ? hg->vwgt[u*hg->VtxWeightDim] : 1.0);

                if (pno<0 || pno>1)
                    errexit("heeeey pno=%d", pno);
                /* let's remove it from its part */
                lwadjust[pno] -= w;                
            }
            lweights[0] += lwadjust[0];
            lweights[1] += lwadjust[1];
            if (detail_timing)         
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);                    
#endif

            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfheap, hgc->Communicator);                    
            
            /* Initialize the heaps and fill them with the gain values */
            Zoltan_Heap_Clear(&heap[from]);  
            for (i = 0; i < hg->nVtx; ++i)
                if ((part[i]==from) && (!hgp->UseFixedVtx || hg->fixed_part[i]<0))
                    Zoltan_Heap_Input(&heap[from], i, gain[i]);
            Zoltan_Heap_Make(&heap[from]);
            if (detail_timing) {
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfheap, hgc->Communicator);
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfpass, hgc->Communicator);
            }

            while ((neggaincnt < maxneggain) && ((lweights[to]+minvw) <= lmax_weight[to]) ) {
                if (Zoltan_Heap_Empty(&heap[from])) { /* too bad it is empty */
                    v = -1;
                    break;
                }
                
                v = Zoltan_Heap_Extract_Max(&heap[from]);    
                
#ifdef _DEBUG
                if (from != part[v])
                    errexit("hooop from=%d part[%d]=%d", from, v, part[v]);
#endif

                /* Mark vertex we picked from the heap so it is "locked". 
                   For the current strategy, moving only one direction 
                   at a time, the mark information is not critical.
                   Note that the mark array is also used in the move/update 
                   routine so don't remove it! */
                ++mark[v];
                if (lweights[to]+((hg->vwgt)?hg->vwgt[v*hg->VtxWeightDim]:1.0) > lmax_weight[to]) {
#ifdef _DEBUG2                    
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d [%4.0lf, %4.0lf] NF\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1]);
#endif
                    /* Negative value in moves array means we have examined 
                       the vertex but couldn't move it. Note offset by one,
                       otherwise zero would be ambiguous. */
                    moves[movecnt++] = -(v+1);
                    continue;
                } 

                    
                moves[movecnt] = v;
                ++neggaincnt;
                cutsize -= gain[v];

                fm2_move_vertex_oneway(v, hg, part, gain, heap, pins, lpins, weights, lweights, mark, adj);
                imbal = (targetw0==0.0) ? 0.0
                    : fabs(weights[0]-targetw0)/targetw0;
                limbal = (ltargetw0==0.0) ? 0.0
                    : fabs(lweights[0]-ltargetw0)/ltargetw0;

                if (notfeasible || (cutsize<best_cutsize) ||
                                   (cutsize==best_cutsize && limbal < best_limbal)) {
#ifdef _DEBUG2                    
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d W[%4.0lf, %4.0lf] I:%.2lf LW[%4.0lf, %4.0lf] LI:%.2lf C:%.1lf<-- Best\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1], imbal, lweights[0], lweights[1], limbal, cutsize); /* after move gain is -oldgain */
#endif
                    notfeasible = weights[from]>max_weight[from];
                    best_cutsize = cutsize;
                    best_cutsizeat = movecnt+1;
                    best_limbal = limbal;
                    neggaincnt = 0;
                }
#ifdef _DEBUG2                
                else
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d [%4.0lf, %4.0lf] %.1lf\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1], cutsize);
#endif
                ++movecnt;
            }

            
            if (detail_timing) {
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpass, hgc->Communicator);
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfroll, hgc->Communicator);
            }

#ifdef _DEBUG
	    if (v<0)
                uprintf(hgc, "EOLB @ %d there was no vertex to select: v=%d\n", movecnt, v);
	    else if (neggaincnt >= maxneggain) 
                uprintf(hgc, "EOLB @ %d max neg move reached neggaincnt(%d) >= maxneggain\n", movecnt, neggaincnt, maxneggain);
	    else 
                uprintf(hgc, "EOLB @ %d balance constraint LW[%.1lf, %.1lf] and MAXW[%.1lf, %.1lf]\n", movecnt, lweights[0], lweights[1], lmax_weight[0], lmax_weight[1]);
#endif
            
            /* roll back the moves without any improvement */
            for (i=movecnt-1; i>=best_cutsizeat; --i) {
                int vv = moves[i];
                if (vv<0)
                    vv = -vv-1;
                else /* we don't need to roll pins, or weights etc; rolling local ones suffices */
                    fm2_move_vertex_oneway_nonroot(vv, hg, part, lpins, lweights);
                mark[vv] = 0;
            }
            for (i=0; i<best_cutsizeat; ++i){
                int vv = (moves[i] < 0 ) ? -moves[i] - 1 : moves[i];
                mark[vv] = 0;
            }
            if (detail_timing) 
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfroll, hgc->Communicator);            
        }

        if (detail_timing) 
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfnonroot, hgc->Communicator);            
        
        /* now root bcast moves to column procs */
        MPI_Bcast(&best_cutsizeat, 1, MPI_INT, rootRank, hgc->col_comm);
        MPI_Bcast(moves, best_cutsizeat, MPI_INT, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) { /* now non-root does move simulation */
            for (i=0; i<best_cutsizeat; ++i) {
                int vv = moves[i];
                if (vv>=0)
                    fm2_move_vertex_oneway_nonroot(vv, hg, part, lpins, lweights);
            }
        }
        if (detail_timing) 
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfnonroot, hgc->Communicator);            

        
#ifdef _DEBUG
        for (i = 0; i < hg->nEdge; ++i) {
            int lp[2];

            lp[0] = lp[1] = 0;
            for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j)
                ++(lp[part[hg->hvertex[j]]]);
            if ((lp[0] != lpins[0][i]) || (lp[1] != lpins[1][i]))
                errexit("for net %d -- lp=[%d, %d] lpins[%d, %d]", i, lp[0], lp[1], lpins[0][i], lpins[1][i]);
        }
#endif


#ifdef HANDLE_ISOLATED_VERTICES
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);        
        
#if 0
        MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);        
        best_imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        if (hgc->myProc_y==rootRank)             
            uprintf(hgc, "BEFORE ISOLATED VERTEX HANDLING WE *THINK* GLOBAL IMBALANCE is %.3lf\n", best_imbal);
#endif
        
        if (hgc->myProc_y==rootRank) {
            best_limbal = (ltargetw0==0.0) ? 0.0
                : fabs(lweights[0]-ltargetw0)/ltargetw0;
            
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int u = moves[i], npno;
                float w=(hg->vwgt ? hg->vwgt[u*hg->VtxWeightDim] : 1.0);

                npno = (lweights[0] < ltargetw0) ? 0 : 1;
                lweights[npno] += w;
                lwadjust[npno] += w;
                part[u] = -(npno+1); /* move to npno (might be same as pno;
                                        so it may not be a real move */
            }
            limbal = (ltargetw0==0.0) ? 0.0
                : fabs(lweights[0]-ltargetw0)/ltargetw0;
#if 0           
            uprintf(hgc, "before binpacking of %d isolated vertices balance was: %.3lf now: %.3lf\n", hg->nVtx-isocnt, best_limbal, limbal);
#endif
        }

        MPI_Bcast(lwadjust, 2, MPI_DOUBLE, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) {
            lweights[0] += lwadjust[0];
            lweights[1] += lwadjust[1];
        }
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);                
#endif        
        
        MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
#if 0       
        best_imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        if (hgc->myProc_y==rootRank)             
            uprintf(hgc, "NEW GLOBAL IMBALANCE is %.3lf\n", best_imbal);
#endif
        
        if (weights[0]==0.0) 
            ltargetw0 = lmax_weight[0] = 0.0;
        else {
            lmax_weight[0] = lweights[0] +
                (max_weight[0] - weights[0]) * ( lweights[0] / weights[0] );
            ltargetw0 = targetw0 * ( lweights[0] / weights[0] ); /* local target weight */
        }
        lmax_weight[1] = (weights[1]==0.0) ? 0.0 : lweights[1] +
            (max_weight[1] - weights[1]) * ( lweights[1] / weights[1] );
        
        cont = 0;
        MPI_Allreduce(&best_cutsizeat, &cont, 1, MPI_INT, MPI_LOR, hgc->row_comm);

        /* since we're only moving in one direction; make sure two successive
           pass didn't produce any improvement before terminating */
        if (!cont)
            ++successivefails; 
        else
            successivefails = 0; 
#ifdef _DEBUG
        /* Just for debugging */
        best_cutsize = Zoltan_PHG_Compute_NetCut(hgc, hg, part);
        imbal = (targetw0 == 0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        printf("%s End of Pass %d Comp.Cut=%.2lf RealCut=%.2lf W[%5.0lf, %5.0lf] Imbal=%.2lf\n", uMe(hgc), passcnt, cutsize, best_cutsize, weights[0], weights[1], imbal);
        /* debuggging code ends here */
#endif
    } while (successivefails<2 &&  (++passcnt < hgp->fm_loop_limit));


#ifdef HANDLE_ISOLATED_VERTICES
    if (detail_timing)         
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);            
    /* now root sneds the final part no's of isolated vertices; if any */
    MPI_Bcast(&isocnt, 1, MPI_INT, rootRank, hgc->col_comm);
    if (isocnt<hg->nVtx) {
        deg = (int *) lgain; /* we'll use for part no's of isolated vertices */
        if (hgc->myProc_y==rootRank) 
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int u = moves[i];
                deg[i] = part[u] = -part[u]-1; 
            }
            
        MPI_Bcast(&moves[isocnt], hg->nVtx-isocnt, MPI_INT, rootRank, hgc->col_comm);
        MPI_Bcast(&deg[isocnt], hg->nVtx-isocnt, MPI_INT, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) 
            for (i=isocnt; i < hg->nVtx; ++i)  /* go over isolated vertices */
                part[moves[i]] = deg[i];
    }
    if (detail_timing)         
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);            
#endif
 End:    

    if (hgc->myProc_y==rootRank) { /* only root needs mark, adj, gain and heaps*/        
        Zoltan_Multifree(__FILE__,__LINE__, 3, &mark, &adj, &gain);
        Zoltan_Heap_Free(&heap[0]);
        Zoltan_Heap_Free(&heap[1]);        
    }
    
    Zoltan_Multifree(__FILE__, __LINE__, 4, &pins[0], &lpins[0], &moves, &lgain);

    if (do_timing) 
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfrefine, hgc->Communicator);
    
    
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ierr;
}
示例#6
0
int Zoltan_PHG_Gather_To_All_Procs(
  ZZ *zz, 
  HGraph *phg,           /* Input:   Local part of distributed hypergraph */
  PHGPartParams *hgp,        /* Input:   Hypergraph parameters */
  PHGComm *scomm,        /* Input:   Serial PHGComm for use by shg. */
  HGraph **gathered_hg   /* Output:  combined hypergraph combined to proc */
)
{
/* 
 * Function to gather distributed hypergraph onto each processor for
 * coarsest partitioning.
 * First hypergraph arrays for the hypergraph on a column of processors
 * are built using MPI_Allgathers down the processor columns.
 * These hypergraph arrays contain complete info about a subset of vertices.
 * Second the column hypergraphs are gathered along processor rows.
 * Each processor then has a complete description of the hypergraph.
 */
char *yo = "Zoltan_PHG_Gather_To_All_Procs";
int ierr = ZOLTAN_OK;
int i, tmp, sum;
int *each = NULL,
    *disp = NULL;      /* Size and displacement arrays for MPI_Allgatherv */
int *send_buf = NULL;    /* Buffer of values to be sent */
int send_size;           /* Size of buffer send_buf */
int *col_vedge = NULL;   /* vedge array for the proc-column hypergraph */
int *col_vindex = NULL;  /* vindex array for the proc-column hypergraph */
int *col_hvertex = NULL; /* hvertex array for the proc-column hypergraph */
int *col_hindex = NULL;  /* hindex array for the proc-column hypergraph */
int col_nVtx;            /* Number of vertices in processor column */
int col_nEdge;           /* Number of edges in processor column */
int col_nPin;            /* Number of pins in processor column */

int *recv_size = NULL;   /* nPins for each proc in col or row */

HGraph *shg;             /* Pointer to the serial hypergraph to be
                            returned by this function. */

int myProc_x = phg->comm->myProc_x;
int nProc_x = phg->comm->nProc_x;
int nProc_y = phg->comm->nProc_y;
int max_nProc_xy = MAX(nProc_x, nProc_y);

  if (phg->comm->nProc == 1) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Do not call this routine on one proc.");
    return ZOLTAN_FATAL;
  }

#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, phg, NULL, stdout, "GatherBefore");/* NULL parts for now;
                                                           add non-NULL later */
#endif

  /******************************************************************
   *  0. Allocate the hypergraph to be returned. 
   *  Set values that we already know. 
   ******************************************************************/

  shg = *gathered_hg = (HGraph *) ZOLTAN_MALLOC(sizeof(HGraph));
  if (!shg) MEMORY_ERROR;

  Zoltan_HG_HGraph_Init(shg);
  shg->nVtx = phg->dist_x[nProc_x];    /* TODO64 - can this exceed 2B? */
  shg->nEdge = phg->dist_y[nProc_y];

  shg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  shg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  if (!shg->dist_x || !shg->dist_y) MEMORY_ERROR;

  shg->dist_x[0] = shg->dist_y[0] = 0;
  shg->dist_x[1] = shg->nVtx;
  shg->dist_y[1] = shg->nEdge;

  shg->comm = scomm;

  shg->EdgeWeightDim = phg->EdgeWeightDim;
  shg->VtxWeightDim = phg->VtxWeightDim;
  if (shg->VtxWeightDim && shg->nVtx)
    shg->vwgt = (float *) ZOLTAN_MALLOC(shg->nVtx * shg->VtxWeightDim 
                                                  * sizeof(float));
  if (shg->EdgeWeightDim && shg->nEdge)
    shg->ewgt = (float *) ZOLTAN_MALLOC(shg->nEdge * shg->EdgeWeightDim 
                                                  * sizeof(float));
  /* Fixed vertices */
  shg->bisec_split = phg->bisec_split;
  if (hgp->UseFixedVtx)
    shg->fixed_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  if (hgp->UsePrefPart)
    shg->pref_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  
  /* Allocate arrays for use in gather operations */
  recv_size = (int *) ZOLTAN_MALLOC(3 * max_nProc_xy * sizeof(int));
  each = recv_size + max_nProc_xy;
  disp = each + max_nProc_xy;
 
  /* TODO64 - phg->dist_y[nProc_y] could exceed 2 Billion, NO? */
  send_size = MAX(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x], 
                  phg->dist_y[nProc_y]);
  send_buf = (int *) ZOLTAN_MALLOC(send_size * sizeof(int));
  

  if ((shg->VtxWeightDim && shg->nVtx && !shg->vwgt) ||
      (shg->EdgeWeightDim && shg->nEdge && !shg->ewgt) || !recv_size ||
      (send_size && !send_buf)) 
    MEMORY_ERROR;
  

  /*************************************************************
   *  1. Gather all non-zeros for vertices in processor column *
   *************************************************************/
  
  if (nProc_y == 1) {
    /* 
     * Don't need a gather; just set pointers appropriately for row-gather
     * in Step 2 below.
     */

    col_nVtx = phg->nVtx;
    col_nEdge = phg->nEdge;
    col_nPin = phg->nPins;
    col_vindex = phg->vindex;
    col_vedge = phg->vedge;
    col_hindex = phg->hindex;
    col_hvertex = phg->hvertex;

    for (i = 0; i < shg->EdgeWeightDim * shg->nEdge; i++)
      shg->ewgt[i] = phg->ewgt[i];
  }

  else {

    /* Gather local size info for each proc in column */

    MPI_Allgather(&(phg->nPins), 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->col_comm);
  
    /* Compute number of vtx, edge, and nnz in column */
    col_nVtx = (int)(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x]);
    col_nEdge = phg->dist_y[nProc_y];   /* SCHEMEA */
    col_nPin = 0;
    for (i = 0; i < nProc_y; i++) {
      col_nPin += recv_size[i];
    }
    
    /* Allocate arrays for column hypergraph */
    col_hindex = (int *) ZOLTAN_CALLOC((col_nEdge+1), sizeof(int));
    col_hvertex = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    col_vindex = (int *) ZOLTAN_CALLOC((col_nVtx+1), sizeof(int));
    col_vedge = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    if (!col_vindex || !col_hindex || 
        (col_nPin && (!col_vedge || !col_hvertex)))
      MEMORY_ERROR;
    
    /* Gather hvertex data for all procs in column */
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex LNO to GNO here. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_y; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(phg->hvertex, phg->nPins, MPI_INT,
                   col_hvertex, recv_size, disp, MPI_INT, phg->comm->col_comm);
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex GNO to LNO here */
  
    /* Gather hindex data for all procs in column */
  
    for (i = 0; i < phg->nEdge; i++)
      send_buf[i] = phg->hindex[i+1] - phg->hindex[i];
  
    /* SCHEMEA can assume a recv for each edge;
     * SCHEMEB needs to gather the number of edges recv'd from each proc. */
  
    for (i = 0; i < nProc_y; i++) 
      each[i] = phg->dist_y[i+1] - phg->dist_y[i];

    disp[0] = 0;  /* Can't use dist_y because it may not be sizeof(int) */
    for (i=1; i < nProc_y; i++){
      disp[i] = disp[i-1] + each[i-1];
    }
  
    /* SCHEMEA can use phg->dist_y for displacement array.
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, phg->nEdge, MPI_INT, 
                   col_hindex, each, disp, MPI_INT, phg->comm->col_comm);
  
    /* Perform prefix sum on col_hindex */
    sum = 0;
    for (i = 0; i < col_nEdge; i++) {
      tmp = col_hindex[i];
      col_hindex[i] = sum;
      sum += tmp;
    }
    col_hindex[col_nEdge] = sum;

    /* Sanity check */
    if (col_hindex[col_nEdge] != col_nPin) {
      printf("%d Sanity check failed:  "
             "col_hindex[col_nEdge] %d != col_nPin %d\n", 
              zz->Proc, col_hindex[col_nEdge], col_nPin);
      exit(-1);
    }
  
    /* Gather edge weights, if any. */
    if (shg->EdgeWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->EdgeWeightDim;
      for (i = 1; i < nProc_y; i++) {
        each[i] *= phg->EdgeWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->ewgt, phg->nEdge*phg->EdgeWeightDim, MPI_FLOAT, 
                     shg->ewgt, each, disp, MPI_FLOAT, phg->comm->col_comm);
    }
   
  
    Zoltan_HG_Mirror(col_nEdge, col_hindex, col_hvertex, 
                     col_nVtx, col_vindex, col_vedge);
  
  }  /* End column-gather */
  
  /*************************************************************
   *  2. Gather all non-zeros for edges in processor rows      *
   *  All processors in a processor column now have the same   *
   *  hypergraph; we now gather it across rows.                *
   *************************************************************/

  if (nProc_x == 1) {
    /* 
     * Don't need a gather across the row; just set pointers appropriately
     * in shg.
     */
    shg->vindex = col_vindex;
    shg->vedge = col_vedge;
    shg->hindex = col_hindex;
    shg->hvertex = col_hvertex;

    /* Copy vwgt and fixed arrays so shg owns this memory */
    for (i = 0; i < shg->VtxWeightDim*shg->nVtx; i++)
      shg->vwgt[i] = phg->vwgt[i];
    if (hgp->UseFixedVtx)
      for (i = 0; i < shg->nVtx; i++)
        shg->fixed_part[i] = phg->fixed_part[i];
    if (hgp->UsePrefPart)
      for (i = 0; i < shg->nVtx; i++)
        shg->pref_part[i] = phg->pref_part[i];
  }

  else {

    /* Gather info about size within the row */

    MPI_Allgather(&col_nPin, 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->row_comm);
  
    tmp = 0;
    for (i = 0; i < nProc_x; i++) 
      tmp += recv_size[i];

    shg->nPins = tmp;
  
    shg->vindex = (int *) ZOLTAN_CALLOC((shg->nVtx+1), sizeof(int));
    shg->vedge = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
    shg->hindex = (int *) ZOLTAN_CALLOC((shg->nEdge+1), sizeof(int));
    shg->hvertex = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
   
    if (!shg->vindex || !shg->hindex ||
        (shg->nPins && (!shg->vedge || !shg->hvertex)))
      MEMORY_ERROR;
    
    /* Gather vedge data for all procs in row */
  
    /* SCHEMEA can send local edge numbers; 
       SCHEMEB requires edge LNO to GNO conversion. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_x; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(col_vedge, col_nPin, MPI_INT,
                   shg->vedge, recv_size, disp, MPI_INT, phg->comm->row_comm);
  
    /* Gather vindex data for all procs in row */
  
    for (i = 0; i < col_nVtx; i++)
      send_buf[i] = col_vindex[i+1] - col_vindex[i];
  
    /* SCHEMEA can assume a recv for each vertex;
     * SCHEMEB would need to gather the number of vtxs recv'd from each proc. */
  
    for (i = 0; i < nProc_x; i++) 
      each[i] = (int)(phg->dist_x[i+1] - phg->dist_x[i]);

    disp[0] = 0;  /* Can't use dist_x, may not be sizeof(int) */
    for (i = 1; i < nProc_x; i++) 
      disp[i] = disp[i-1] + each[i-1];

    /* SCHEMEA can use phg->dist_x as displacement array;
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, col_nVtx, MPI_INT, 
                   shg->vindex, each, disp,
                   MPI_INT, phg->comm->row_comm);

    /* Perform prefix sum on shg->vindex */
    sum = 0;
    for (i = 0; i < shg->nVtx; i++) {
      tmp = shg->vindex[i];
      shg->vindex[i] = sum;
      sum += tmp;
    }
    shg->vindex[shg->nVtx] = sum;
  
    /* Sanity check */
    if (shg->vindex[shg->nVtx] != shg->nPins) {
      printf("%d Sanity check failed:  "
             "shg->vindex %d != nPins %d\n", 
              zz->Proc, shg->vindex[shg->nVtx], shg->nPins);
      exit(-1);
    }
  
    /* Gather fixed array, if any  */
    if (hgp->UseFixedVtx){
  
#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather before gather. phg->fixed =");
      for (i=0; i<phg->nVtx; i++){
        printf(" %d ", phg->fixed_part[i]);
      }
      printf("\n");
#endif

      /* Can use the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->fixed_part, phg->nVtx, MPI_FLOAT, 
                     shg->fixed_part, each, disp, MPI_FLOAT, phg->comm->row_comm);

#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather after gather. shg->fixed =");
      for (i=0; i<shg->nVtx; i++){
        printf(" %d ", shg->fixed_part[i]);
      }
      printf("\n");
#endif
    }
    /* Gather pref part array, if any  */
    if (hgp->UsePrefPart){
      /* Can use the same each array. */
      /* Need to compute new disp array. */
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->pref_part, phg->nVtx, MPI_FLOAT, 
                     shg->pref_part, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
    
    /* Gather vertex weights, if any. */
    if (shg->VtxWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->VtxWeightDim;
      for (i = 1; i < nProc_x; i++) {
        each[i] *= phg->VtxWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->vwgt, phg->nVtx*phg->VtxWeightDim, MPI_FLOAT, 
                     shg->vwgt, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
  
    Zoltan_HG_Mirror(shg->nVtx, shg->vindex, shg->vedge, 
                     shg->nEdge, shg->hindex, shg->hvertex);

  }  /* End row gather */
  
#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, shg, NULL, stdout, "GatherAfter");/* NULL parts for now;
                                                           add non-NULL later */
  Zoltan_PHG_Plot_2D_Distrib(zz, phg);
  Zoltan_PHG_Plot_2D_Distrib(zz, shg);
#endif

End:

  if (ierr < 0) {
    Zoltan_HG_HGraph_Free(*gathered_hg);
    ZOLTAN_FREE(gathered_hg);
  }

  Zoltan_Multifree(__FILE__, __LINE__, 2, &send_buf, 
                                          &recv_size);

  if (nProc_x > 1 && nProc_y > 1) 
    Zoltan_Multifree(__FILE__, __LINE__, 4, &col_vedge,
                                            &col_vindex,
                                            &col_hvertex,
                                            &col_hindex);
  return ierr;
}
示例#7
0
static int Zoltan_PHG_Redistribute_Hypergraph(
    ZZ *zz, 
    PHGPartParams *hgp,     /* Input:  parameters; used only for UseFixedVtx */
    HGraph  *ohg,           /* Input:  Local part of distributed hypergraph */
    int     firstproc,      /* Input:  rank (in ocomm) of the first proc of 
                                       the ncomm*/
    int     *v2Col,         /* Input:  Vertex to processor Column Mapping */
    int     *n2Row,         /* Input:  Net to processor Row Mapping */
    PHGComm *ncomm,         /* Input:  communicators of new distribution */
    HGraph  *nhg,           /* Output: Newly redistributed hypergraph */
    int     **vmap,         /* Output: allocated with the size nhg->nVtx and
                               vertex map from nhg to ohg's local vertex number*/
    int     **vdest         /* Output: allocated with the size nhg->nVtx and
                               stores dest proc in ocomm */
    )
{
    char * yo = "Zoltan_PHG_Redistribute_Hypergraph";
    PHGComm *ocomm = ohg->comm;
    int ierr=ZOLTAN_OK;
    int i, v, n, nPins, nsend, elemsz, nVtx, nEdge;
    int msg_tag = 9999;
    int *proclist=NULL, *sendbuf=NULL;
    int *vno=NULL, *nno=NULL, *dist_x=NULL, *dist_y=NULL,
        *vsn=NULL, *nsn=NULL, *pins=NULL, *cnt=NULL;
    ZOLTAN_COMM_OBJ *plan;    
    
    Zoltan_HG_HGraph_Init (nhg);
    nhg->comm = ncomm;
    
    nhg->dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    nhg->dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    vsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int));
    nsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int));
    vno = (int *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(int));
    nno = (int *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(int));

    if (!nhg->dist_x || !nhg->dist_y || !dist_x || !dist_y ||
        !vsn || !nsn || (ohg->nVtx && !vno) || (ohg->nEdge && !nno) ) {
        uprintf(ocomm, " new comm nProcx=%d nProcy=%d nvtx=%d nedge=%d", ncomm->nProc_x, ncomm->nProc_y, ohg->nVtx, ohg->nEdge);
        MEMORY_ERROR;
    }
      
    for (v = 0; v < ohg->nVtx; ++v)
        ++dist_x[v2Col[v]];
    for (n = 0; n < ohg->nEdge; ++n)
        ++dist_y[n2Row[n]];

    /* UVCUVC: CHECK ASSUMPTION
       This code assumes that the objects in the receive buffer of
       Zoltan_Comm_Do function are
         1- in the increasing processor order,
         2- order of the items send by a processor is preserved.
     */
    

    /* compute prefix sum to find new vertex start numbers; for each processor */
    MPI_Scan(dist_x, vsn, ncomm->nProc_x, MPI_INT, MPI_SUM, ocomm->row_comm);
    /* All reduce to compute how many each processor will have */ 
    MPI_Allreduce(dist_x, &(nhg->dist_x[1]), ncomm->nProc_x, MPI_INT, MPI_SUM, 
                  ocomm->row_comm);
    nhg->dist_x[0] = 0;    
    for (i=1; i<=ncomm->nProc_x; ++i) 
        nhg->dist_x[i] += nhg->dist_x[i-1];
    
    MPI_Scan(dist_y, nsn, ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm);

    MPI_Allreduce(dist_y, &(nhg->dist_y[1]), ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm);
    nhg->dist_y[0] = 0;
    for (i=1; i<=ncomm->nProc_y; ++i)
        nhg->dist_y[i] += nhg->dist_y[i-1];

#ifdef _DEBUG1
    PrintArr(ocomm, "vsn", vsn, ncomm->nProc_x);
    PrintArr(ocomm, "nsn", nsn, ncomm->nProc_y);
#endif
    
    /* find mapping of current LOCAL vertex no (in my node)
       to "new" vertex no LOCAL to dest node*/
    for (v = ohg->nVtx-1; v>=0; --v)
        vno[v] = --vsn[v2Col[v]];
    for (n = ohg->nEdge-1; n>=0; --n)
        nno[n] = --nsn[n2Row[n]];

    nsend = MAX(MAX(ohg->nPins, ohg->nVtx), ohg->nEdge);
    elemsz = MAX(MAX(2, ohg->VtxWeightDim), ohg->EdgeWeightDim);
    elemsz = (sizeof(float)>sizeof(int)) ? sizeof(float)*elemsz : sizeof(int)*elemsz;

    proclist = (int *) ZOLTAN_MALLOC(nsend * sizeof(int));
    sendbuf = (int *) ZOLTAN_MALLOC(nsend * elemsz);

    /* first communicate pins */
    nPins = 0;
    for (v = 0; v < ohg->nVtx; ++v) { 
        for (i = ohg->vindex[v]; i < ohg->vindex[v+1]; ++i) {
#ifdef _DEBUG1
            if ((n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])<0 ||
                (n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])>=ocomm->nProc)
                errexit("vertex %d vedge[%d]=%d n2Row=%d #Proc_x=%d v2Col=%d", i, ohg->vedge[i], n2Row[ohg->vedge[i]], ncomm->nProc_x , v2Col[v]);
#endif
            proclist[nPins]   = firstproc + n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v];
            sendbuf[2*nPins]  = vno[v];
            sendbuf[2*nPins+1]= nno[ohg->vedge[i]];
            ++nPins; 
        }
    }
#ifdef _DEBUG1
    if (nPins!=ohg->nPins) {
        uprintf(ocomm, "sanity check failed nPins(%d)!=hg->nPins(%d)\n", nPins, ohg->nPins);
        errexit("terminating");
    }
#endif

    --msg_tag;
    ierr |= Zoltan_Comm_Create(&plan, ohg->nPins, proclist, ocomm->Communicator,
                               msg_tag, &nPins);

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nPins>1) { /* this processor is not in new comm but receiving data?*/
        uprintf(ocomm, "Something wrong; why I'm receiving data nPins=%d\n", nPins);
        errexit("terminating");
    }
#endif
    
    if (nPins && (pins = (int *) ZOLTAN_MALLOC(nPins * 2 * sizeof(int)))==NULL) 
        MEMORY_ERROR;

    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, 2*sizeof(int),
                   (char *) pins);
    Zoltan_Comm_Destroy(&plan);

    /* now communicate vertex map */
    nsend = 0;
    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) { 
            proclist[nsend] = firstproc+v2Col[v];
            sendbuf[nsend++] = ohg->vmap[v];
        }
    }
        
    --msg_tag; 
    ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator,
                               msg_tag, &nVtx); 

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nVtx>1) { /* this processor is not in new comm but receiving data?*/ 
        uprintf(ocomm, "Something wrong; why I'm receiving data nVtx=%d\n", nVtx);
        errexit("terminating");
    }
#endif

    /* those are only needed in the first row of ncomm */
    *vmap = *vdest = NULL;  
    if (!ncomm->myProc_y && nVtx &&
        (!(*vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int))) ||
         !(*vdest = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))))
        MEMORY_ERROR;
    
    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int),
                   (char *) *vmap);

    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) 
            sendbuf[v] = ocomm->myProc;
    }
    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int),
                   (char *) *vdest);
        
    if (ncomm->myProc!=-1) { /* I'm in the new comm */
        /* ncomm's first row now bcast to other rows */
        MPI_Bcast(&nVtx, 1, MPI_INT, 0, ncomm->col_comm);
#ifdef _DEBUG1
        if (nVtx!=(nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]))
            errexit("nVtx(%d)!= nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x](%d)", nVtx, nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]);
#endif
        if (nVtx && (nhg->vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))==NULL) 
            MEMORY_ERROR;
        for (i=0; i<nVtx; ++i)
            nhg->vmap[i] = i;
    }


    /* now communicate vertex weights */
    if (ohg->VtxWeightDim) {
        if (nVtx)
            nhg->vwgt = (float*) ZOLTAN_MALLOC(nVtx*ohg->VtxWeightDim*sizeof(float));
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->vwgt,
                       ohg->VtxWeightDim*sizeof(float), (char *) nhg->vwgt);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->vwgt, nVtx*ohg->VtxWeightDim, MPI_FLOAT, 0, ncomm->col_comm);
    }    

    /* communicate fixed vertices, if any */
    if (hgp->UseFixedVtx) {
        if (nVtx)
            nhg->fixed_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->fixed_part,
                       sizeof(int), (char *) nhg->fixed_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->fixed_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }    
    /* communicate pref parts, if any */
    if (hgp->UsePrefPart) {
        if (nVtx)
            nhg->pref_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->pref_part,
                       sizeof(int), (char *) nhg->pref_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->pref_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }    

    /* this comm plan is no longer needed. */
    Zoltan_Comm_Destroy(&plan);

    
    if (ohg->EdgeWeightDim) { /* now communicate edge weights */
        nsend = 0;
        if (!ocomm->myProc_x)  /* only first column sends to first column of ncomm */
            for (n = 0; n < ohg->nEdge; ++n) 
                proclist[nsend++] = firstproc + n2Row[n]*ncomm->nProc_x;
    
        --msg_tag;
        ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator,
                                   msg_tag, &nEdge);

#ifdef _DEBUG1
        if (ncomm->myProc==-1 && nEdge>1) { /* this processor is not in new comm but receiving data?*/
            uprintf(ocomm, "Something wrong; why I'm receiving data nEdge=%d\n", nEdge);
            errexit("terminating");
        }
#endif
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            MPI_Bcast(&nEdge, 1, MPI_INT, 0, ncomm->row_comm);
#ifdef _DEBUG1
            if (nEdge != (nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]))
            errexit("nEdge(%d)!=nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y](%d)", nEdge, nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]);
#endif
        }
        
        if (nEdge)
            nhg->ewgt = (float*) ZOLTAN_MALLOC(nEdge*ohg->EdgeWeightDim*sizeof(float));
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->ewgt,
                       ohg->EdgeWeightDim*sizeof(float), (char *) nhg->ewgt);
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            if (nEdge) 
                MPI_Bcast(nhg->ewgt, nEdge*ohg->EdgeWeightDim, MPI_FLOAT, 0, 
                          ncomm->row_comm);
        }

        Zoltan_Comm_Destroy(&plan);
    } else 
        nEdge = (ncomm->myProc==-1) 
                ? 0 
                : nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y];
    

    if (ncomm->myProc==-1) {
#ifdef _DEBUG1
        if (nPins || nVtx || nEdge)
            errexit("I should not have any data: hey nPins=%d  nVtx=%d  nEdge=%d\n", nPins, nVtx, nEdge);
#endif
        nhg->nEdge = nhg->nVtx = nhg->nPins = 0;
    } else {
        nhg->nEdge = nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y];
        nhg->nVtx = nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x];
        nhg->nPins = nPins;
    
        /* Unpack the pins received. */
        cnt = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vindex = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vedge = (int *) ZOLTAN_MALLOC(nhg->nPins * sizeof(int));
        
        if (!cnt || !nhg->vindex || (nPins && !nhg->vedge))
            MEMORY_ERROR;

        /* Count the number of pins per vertex */
        for (i = 0; i < nPins; ++i)
            ++cnt[pins[2*i]];
        
        /* Compute prefix sum to represent hindex correctly. */
        for (i = 0; i < nhg->nVtx; ++i)  {
            nhg->vindex[i+1] = nhg->vindex[i] + cnt[i];
            cnt[i] = nhg->vindex[i];
        }

        for (i = 0; i < nPins; ++i) 
            nhg->vedge[cnt[pins[2*i]]++] = pins[2*i+1];
        
        nhg->info               = ohg->info;
        nhg->VtxWeightDim       = ohg->VtxWeightDim;
        nhg->EdgeWeightDim      = ohg->EdgeWeightDim;

        ierr = Zoltan_HG_Create_Mirror(zz, nhg);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN)
            MEMORY_ERROR;
    }

 End:
    Zoltan_Multifree(__FILE__, __LINE__, 10,
                     &proclist, &sendbuf, &pins, &cnt,
                     &vno, &nno, &dist_x, &dist_y, &vsn, &nsn
        );
    
    return ierr;
}
示例#8
0
static int actual_arrays(
    ZZ *zz,
    int num_gid_entries,
    int num_lid_entries,
    int num,
    ZOLTAN_ID_PTR gids,
    ZOLTAN_ID_PTR lids,
    int *procs,
    int *to_part,
    int *actual_num,
    ZOLTAN_ID_PTR *actual_gids,
    ZOLTAN_ID_PTR *actual_lids,
    int **actual_procs,
    int **actual_to_part,
    int *actual_allocated
)
{
    char *yo = "actual_arrays";
    int i, j;

    /*
     *  Test whether to pack objects that have changed partition
     *  but not changed processor.
     *  If packing them, the actual objects == objects passed to this function.
     *  If not packing them, build arrays with them stripped out.
     */

    *actual_allocated = 0;
    if (!(zz->Migrate.Only_Proc_Changes)) {
        /* Pack all objects, even if they are not changing processor. */
        *actual_num = num;
        *actual_gids = gids;
        *actual_lids = lids;
        *actual_procs = procs;
        *actual_to_part = to_part;
    }
    else {  /* zz->Migrate.Only_Proc_Changes */
        /* Pack only objects that are actually changing processor. */
        *actual_num = 0;
        for (i = 0; i < num; i++)
            if (procs[i] != zz->Proc)
                (*actual_num)++;

        if (*actual_num == num) {
            /*  Number of actual objects == number of objects in input arrays. */
            /*  No stripping needed. */
            *actual_gids = gids;
            *actual_lids = lids;
            *actual_procs = procs;
            *actual_to_part = to_part;
        }
        else if (*actual_num != num && *actual_num > 0) {
            /*  Number of actual_num < num.  Build arrays  */
            /*  containing only actual objects. */
            *actual_allocated = 1;
            *actual_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, *actual_num);
            *actual_lids = ZOLTAN_MALLOC_LID_ARRAY(zz, *actual_num);
            *actual_procs = (int *) ZOLTAN_MALLOC(sizeof(int) * (*actual_num));
            if (to_part != NULL)
                *actual_to_part = (int *) ZOLTAN_MALLOC(sizeof(int)*(*actual_num));
            if (*actual_gids == NULL || (num_lid_entries && *actual_lids == NULL) ||
                    *actual_procs == NULL ||
                    (to_part != NULL && *actual_to_part == NULL)) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory Error.");
                Zoltan_Multifree(__FILE__, __LINE__, 4,
                                 actual_gids, actual_lids,
                                 actual_procs, actual_to_part);
                return (ZOLTAN_MEMERR);
            }

            for (j = 0, i = 0; i < num; i++) {
                if (procs[i] != zz->Proc) {
                    ZOLTAN_SET_GID(zz,
                                   *actual_gids + j*num_gid_entries,
                                   gids + i*num_gid_entries);
                    if (num_lid_entries)
                        ZOLTAN_SET_LID(zz,
                                       *actual_lids + j*num_lid_entries,
                                       lids + i*num_lid_entries);
                    (*actual_procs)[j] = procs[i];
                    if (to_part) (*actual_to_part)[j] = to_part[i];
                    j++;
                }
            }
        }
    }
    return ZOLTAN_OK;
}
示例#9
0
文件: phg.c 项目: askhl/octopus-dfrt2
static int findAndSaveLeftAloneVertices(ZZ *zz, HGraph *hg, int p, 
                                 Partition parts,
                                 PHGPartParams *hgp) 
{
    char *yo="findAndSaveLeftAloneVertices";
    int *lneigh[2]={NULL, NULL}, *neigh[2]={NULL, NULL}, i, j, ierr=ZOLTAN_OK;
    int *lpins=NULL, *pins=NULL;
    PHGComm *hgc=hg->comm;

    if (hg->nEdge && (!(lpins = (int*) ZOLTAN_CALLOC(p * hg->nEdge, sizeof(int))) ||
                      !(pins  = (int*) ZOLTAN_MALLOC(p * hg->nEdge * sizeof(int)))))
        MEMORY_ERROR;

    for (i = 0; i < hg->nEdge; ++i)
        for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j)
            ++lpins[i*p+parts[hg->hvertex[j]]];
    if (hg->nEdge)
        MPI_Allreduce(lpins, pins, p*hg->nEdge, MPI_INT, MPI_SUM, 
                      hgc->row_comm);
    
    if (hg->nVtx && !(lneigh[0]  = (int*) ZOLTAN_MALLOC(2 * hg->nVtx * sizeof(int))))
        MEMORY_ERROR;
    if (!hgc->myProc_y) 
        if (hg->nVtx && !(neigh[0]  = (int*) ZOLTAN_MALLOC(2 * hg->nVtx * sizeof(int))))
            MEMORY_ERROR;

    if (hg->nVtx) {
        lneigh[1] = &(lneigh[0][hg->nVtx]);
        if (!hgc->myProc_y)
            neigh[1] = &(neigh[0][hg->nVtx]);
    }

    for (i = 0; i < hg->nVtx; ++i) {
        int pno = parts[i];
        lneigh[0][i] = lneigh[1][i] = 0;
        for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
            int edge = hg->vedge[j], k;
            lneigh[0][i] += (pins[edge*p+pno]-1); /* exclude the vertex's itself */
            for (k=0; k<p; ++k)
                if (k!=pno)
                    lneigh[1][i] += pins[edge*p+k];            
        }
    }
    
    if (hg->nVtx) 
        MPI_Reduce(lneigh[0], neigh[0], 2*hg->nVtx, MPI_INT, MPI_SUM, 0, hgc->col_comm);

    if (!hgc->myProc_y) {
        int alone=0, galone=0;        
        for (i=0; i<hg->nVtx; ++i)
            if (!neigh[0] && neigh[1]) {
                ++alone;
                if (alone<10)
                    uprintf(hgc, "vertex %d is alone in part %d but it has %d neighbours (!overcounted!) on other %d parts\n", i, parts[i], neigh[1]);
            }
        MPI_Reduce(&alone, &galone, 1, MPI_INT, MPI_SUM, 0, hgc->row_comm);
        if (!hgc->myProc)
            uprintf(hgc, "There are %d left-alone vertices\n", galone);        
    }
End:
    Zoltan_Multifree(__FILE__,__LINE__, 4, &lpins, &pins, &lneigh[0], &neigh[0]);
    return ierr;
}
示例#10
0
int Zoltan_PHG_ParKway(
  ZZ        *zz,
  HGraph    *hg,
  int       nparts,           /* # of desired partitions */
  Partition partvec,          /* Output:  partition assignment vector */
  PHGPartParams *hgp          /* Input: hypergraph parameters */  
)
{
    int ierr = ZOLTAN_OK;
    char *yo = "Zoltan_HG_ParKway";

#ifndef ZOLTAN_PARKWAY
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "ParKway method selected but Zoltan is not"
                       "built and linked with ParKway.");
    return ZOLTAN_FATAL;
#else

    int options[29];                      /* ParKway options */
    int *ivwgts = NULL, *iewgts = NULL;   /* ParKway expects integer weights. */
    int *pvector=NULL;                    /* partvec for "local" vertices */
    int cut;                              /* Diagnostics from ParKway */
    double constraint;                    /* imbalance ratio */
    int i, anVtx, nVtx;                   /* counter and local vertex cnt (for first k-1 parts)*/
    PHGComm *hgc=hg->comm;
    int *disp=NULL, *recv_size=NULL;      /* for allgatherv */
    static int seed=1;
    
    /* ParKway expects integer weights; convert if weights are provided. */
    ivwgts = (int *) ZOLTAN_MALLOC(hg->nVtx  * sizeof(int));
    iewgts = (int *) ZOLTAN_MALLOC(hg->nEdge * sizeof(int));    
    if (!ivwgts || !iewgts)
        ZOLTAN_PARKWAY_ERROR("Memory error.", ZOLTAN_MEMERR);
    
    
    if (hg->VtxWeightDim > 1) { 
        ZOLTAN_PARKWAY_ERROR("ParKway supports Vtx_Weight_Dim == 0 or 1 only.",
                             ZOLTAN_FATAL);
    } else if (hg->VtxWeightDim == 1) 
        scale_round_weights(hg->vwgt, ivwgts, hg->nVtx, hg->VtxWeightDim, 0);
    else 
        for (i=0; i<hg->nVtx; ++i)
            ivwgts[i] = 1;

        
    if (hg->EdgeWeightDim > 1) {
        ZOLTAN_PARKWAY_ERROR("ParKway supports Edge_Weight_Dim == 0 or 1 only.",
                             ZOLTAN_FATAL);
    } else if (hg->EdgeWeightDim == 1) 
        scale_round_weights(hg->ewgt, iewgts, hg->nEdge, hg->EdgeWeightDim, 0);
    else
        for (i=0; i<hg->nEdge; ++i)
            iewgts[i] = 1;
    
    
    
    anVtx = hg->nVtx / hgc->nProc;
    nVtx = (hgc->myProc==hgc->nProc-1) ? hg->nVtx-(anVtx*(hgc->nProc-1)) : anVtx;

    pvector = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int));
    disp = (int *) ZOLTAN_MALLOC(hgc->nProc * sizeof(int));
    recv_size = (int *) ZOLTAN_MALLOC(hgc->nProc * sizeof(int));
    if ((nVtx && !pvector) || !disp || !recv_size)
        ZOLTAN_PARKWAY_ERROR("Memory error.", ZOLTAN_MEMERR);


    /* ----- Set ParKway's options --------------- */
    options[0] = 1;//0 -> all options use default, else user define
    options[1] = seed++;//0 -> seed chosen by sprng, else use options[1] as seed
    options[2] = 0;//0 -> no disp info, 1 -> some, 2 -> lots
    options[3] = 1;//0 -> do not write partition to disk, 1 -> do write
    options[4] = 1;//number of parallel runs
    options[5] = 0;//vertex to processor allocation: 0 -> as read in, 1 -> random 2 -> as prescribed in partition file 
    options[6] = 100;//hyperedge length percentile for approx para coarsening and refinement
    options[7] = 1;//increment in percentile options[6]
    options[8] = 200;//numParts*options[5] -> min number of vertices in coarse hypergraph
    options[9] = 7;//[9] and [10] specify reduction ratio in parallel coarsening
    options[10] = 4;//r = [9]/[10]
    options[11] = 3;//vertex visit order: 3 -> random, 1/2 inc/dec by vertex id, 4/5 inc/dec by vertex wt
    options[12] = 3;//divide connectivity by cluster weight/hyperedge length: 0-neither, 1-only cluster, 2-only hedge len, 3-both   
    options[13] = 3;//matching request resolution order: 3 -> random, 2 -> as they arrive 
    options[14] = 1;//number serial partitioning runs
    
    options[15] = 5;//serial partitioning routine, 1-3 RB, 4 khmetis, 5 patoh, see manual
    
    if (!strcasecmp(hgp->parkway_serpart, "patoh"))
        options[15] = 5;
    else if (!strcasecmp(hgp->parkway_serpart, "hmetis"))
        options[15] = 4;
    else if (!strcasecmp(hgp->parkway_serpart, "generic"))
        options[15] = 1;
    else if (!strcasecmp(hgp->parkway_serpart, "genericv"))
        options[15] = 2;
    else if (!strcasecmp(hgp->parkway_serpart, "genericmv"))
        options[15] = 3;
    else {
        ZOLTAN_PARKWAY_ERROR("Invalid ParKway serial partitioner. It should be one of; generic, genericv, genericmv, hmetis, patoh.", ZOLTAN_FATAL);
    }
    
    /* uprintf(hgc, "ParKway serpart='%s'  options[13]=%d\n", hgp->parkway_serpart, options[13]); */
    
    options[16] = 2;//serial coarsening algorithm (only if [15] = RB, see manual)
    options[17] = 2;//num bisection runs in RB (only if [15] = RB, see manual)
    options[18] = 10;//num initial partitioning runs in RB (only if [13] = RB, see manual)
    options[19] = 2;//hmetis_PartKway coarsening option, vals 1-5, see manual (only if [15] = 4)   
    options[20] = 2;//hmetis_PartKway refinement option, vals 0-3, see manual (only if [15] = 4)
    options[21] = 3;//patoh_partition parameter settings, vals 1-3, see manual (only if [15] = 5)
    options[22] = 1;//parallel uncoarsening algorithm, 1 simple, 2 only final V-Cycle, 3 all V-Cycle
    options[23] = 5;//limit on number of V-Cycle iterations (only if [22] = 2/3)
    options[24] = 0;//min allowed gain for V-Cycle (percentage, see manual, only if [21] = 2/3)
    options[25] = 0;//percentage threshold used to reject partitions from a number of runs (see manual)
    options[26] = 0;//reduction in [23] as partitions propagate by factor [24]/100 (see manual)
    options[27] = 100;//early exit criterion in parallel refinement, will exit if see ([25]*num vert)/100 consecutive -ve moves 
    options[28] = 0;//parallel refinement 0->basic, 1->use approx 2->use early exit 3->use approx and early exit  
    
    constraint = hgp->bal_tol-1.0;
    
    
    Zoltan_ParaPartKway(nVtx, hg->nEdge, &ivwgts[hgc->myProc*anVtx], iewgts,
                        hg->hindex, hg->hvertex, nparts,
                        constraint, &cut, options, pvector, NULL, hgc->Communicator);
    
/* KDDKDD
   uprintf(hgc, "ParaPartKway cut=%d\n", cut);
*/
    
    
    /* after partitioning Zoltan needs partvec exist on all procs for nProc_x=1 */       
    disp[0] = 0; 
    for (i = 1; i < hgc->nProc; ++i)
        disp[i] = disp[i-1] + anVtx;
    
    MPI_Allgather (&nVtx, 1, MPI_INT, recv_size, 1, MPI_INT, hgc->Communicator);    
    MPI_Allgatherv(pvector, nVtx, MPI_INT, 
                  partvec, recv_size, disp, MPI_INT, hgc->Communicator);

    
  /* HERE:  Check whether imbalance criteria were met. */

End:

    Zoltan_Multifree(__FILE__,__LINE__, 5, &ivwgts, &iewgts, &pvector, &disp, &recv_size);
    
#endif
  return ierr;
}
示例#11
0
int Zoltan_PHG_LoadBalStat(ZZ *zz, HGraph *hg)
{
    char    *yo = "Zoltan_PHG_LoadBalStat";
    int     ierr=ZOLTAN_OK;
    PHGComm *comm = hg->comm;
    int     *v=NULL, *n=NULL, *p=NULL, x, y, i;
    int     minv=INT_MAX, maxv=-1, minn=INT_MAX, maxn=-1, minp=INT_MAX, maxp=-1;
    double  av=0.0, an=0.0, ap=0.0;

    if ((v = (int*) ZOLTAN_MALLOC(3 * comm->nProc * sizeof(int)))==NULL)
        MEMORY_ERROR;
    n = v + comm->nProc;
    p = n + comm->nProc;

    MPI_Gather(&hg->nVtx, 1, MPI_INT, v, 1, MPI_INT, 0, comm->Communicator);
    MPI_Gather(&hg->nEdge, 1, MPI_INT, n, 1, MPI_INT, 0, comm->Communicator);
    MPI_Gather(&hg->nPins, 1, MPI_INT, p, 1, MPI_INT, 0, comm->Communicator);

    for (i=0; i<comm->nProc; ++i) {
        minv = MIN(minv, v[i]);
        maxv = MAX(maxv, v[i]);
        av += v[i];
        minn = MIN(minn, n[i]);
        maxn = MAX(maxn, n[i]);
        an += n[i];
        minp = MIN(minp, p[i]);
        maxp = MAX(maxp, p[i]);
        ap += p[i];
    }

    av /= (double) comm->nProc;
    an /= (double) comm->nProc;
    ap /= (double) comm->nProc;
    
    if (!comm->myProc) {
#ifdef SHOW_DISTMATRIX        
        printf("Hypergraph distribution:\n     ");
        for (x=0; x<comm->nProc_x; ++x)
            printf("%-33d", x);
        printf("\n");
        for (y=0; y<comm->nProc_y; ++y) {
            printf("%3d: ", y);
            for (x=0; x<comm->nProc_x; ++x) {
                i = y* comm->nProc_x + x;
                printf("H(%7d, %7d, %9d)   ", v[i], n[i], p[i]);  
            }
            printf("\n");
            printf("     ");
            for (x=0; x<comm->nProc_x; ++x) {
                i = y* comm->nProc_x + x;
                printf("  ");
#ifdef SHOW_MINMAXV
                if (v[i]==minv)
                    printf("vvvvvvv  ");                
                else if (v[i]==maxv)
                    printf("^^^^^^^  ");                
                else
#endif
                    printf("         ");
#ifdef SHOW_MINMAXN                
                if (n[i]==minn)
                    printf("<<<<<<<  ");                
                else if (n[i]==maxn)
                    printf(">>>>>>>  ");                
                else
#endif
                    printf("         ");
#ifdef SHOW_MINMAXP                
                if (p[i]==minp)
                    printf("---------    ");                
                else if (p[i]==maxp)
                    printf("+++++++++    ");                
                else
#endif
                    printf("             ");
            }
            printf("\n");             
        }
#endif
        printf("Min:   (%7d, %7d, %9d)    Max: (%7d, %7d, %9d)\n", minv, minn, minp, maxv, maxn, maxp);
        printf("Imbal: (%7.2f, %7.2f, %9.2f)         (%7.2f, %7.2f, %9.2f)\n", 100.0*(av-minv)/av, 100.0*(an-minn)/an, 100.0*(ap-minp)/ap, 100.0*(maxv-av)/av, 100.0*(maxn-an)/an, 100.0*(maxp-ap)/ap);        
    }
 End:
    Zoltan_Multifree(__FILE__, __LINE__, 1, &v);
                         
    return ierr;
}
示例#12
0
/*  Main partitioning function for hypergraph partitioning. */
int Zoltan_PHG_Partition (
  ZZ *zz,               /* Zoltan data structure */
  HGraph *hg,           /* Input hypergraph to be partitioned */
  int p,                /* Input:  number partitions to be generated */
  float *part_sizes,    /* Input:  array of length p containing percentages
                           of work to be assigned to each partition */
  Partition parts,      /* Input:  initial partition #s; aligned with vtx 
                           arrays. 
                           Output:  computed partition #s */
  PHGPartParams *hgp)   /* Input:  parameters for hgraph partitioning. */
{

  PHGComm *hgc = hg->comm;
  VCycle  *vcycle=NULL, *del=NULL;
  int  i, err = ZOLTAN_OK, middle;
  ZOLTAN_GNO_TYPE origVpincnt; /* for processor reduction test */
  ZOLTAN_GNO_TYPE prevVcnt     = 2*hg->dist_x[hgc->nProc_x]; /* initialized so that the */
  ZOLTAN_GNO_TYPE prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; /* while loop will be entered
				 		               before any coarsening */
  ZOLTAN_GNO_TYPE tot_nPins, local_nPins;
  MPI_Datatype zoltan_gno_mpi_type;
  char *yo = "Zoltan_PHG_Partition";
  int do_timing = (hgp->use_timers > 1);
  int fine_timing = (hgp->use_timers > 2);
  int vcycle_timing = (hgp->use_timers > 4 && hgp->ProRedL == 0);
  short refine = 0;
  struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
  int reset_geometric_matching = 0;
  char reset_geometric_string[4];

  ZOLTAN_TRACE_ENTER(zz, yo);

  zoltan_gno_mpi_type = Zoltan_mpi_gno_type();
    
  if (do_timing) {
    if (timer->vcycle < 0) 
      timer->vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle");
    if (timer->procred < 0) 
      timer->procred = Zoltan_Timer_Init(zz->ZTime, 0, "Processor Reduction");
    if (timer->match < 0) 
      timer->match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching");
    if (timer->coarse < 0) 
      timer->coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening");
    if (timer->coarsepart < 0)
      timer->coarsepart = Zoltan_Timer_Init(zz->ZTime, 1,
                                           "Coarse_Partition");
    if (timer->refine < 0) 
      timer->refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement");
    if (timer->project < 0) 
      timer->project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up");

    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

  local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;

  MPI_Allreduce(&local_nPins,&tot_nPins,1,zoltan_gno_mpi_type,MPI_SUM,hgc->Communicator);

  origVpincnt = tot_nPins;

  if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) {
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  /* For geometric coarsening, hgp->matching pointer and string are reset
   * after geometric_levels of coarsening.  Will need to reset them after
   * this vcycle is completed.  Capture that fact now!  */
  if (!strcasecmp(hgp->redm_str, "rcb") || !strcasecmp(hgp->redm_str, "rib")) {
    reset_geometric_matching = 1;
    strcpy(reset_geometric_string, hgp->redm_str);
  }

  /****** Coarsening ******/    
#define COARSEN_FRACTION_LIMIT 0.9  /* Stop if we don't make much progress */
  while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > (ZOLTAN_GNO_TYPE)hg->redl)
	 && ((hg->dist_x[hgc->nProc_x] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) /* prevVcnt initialized to 2*hg->dist_x[hgc->nProc_x] */
	     || (hg->dist_y[hgc->nProc_y] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) /* prevVedgecnt initialized to 2*hg->dist_y[hgc->nProc_y] */
    && hg->dist_y[hgc->nProc_y] && hgp->matching) {
      ZOLTAN_GNO_TYPE *match = NULL;
      VCycle *coarser=NULL, *redistributed=NULL;
        
      prevVcnt     = hg->dist_x[hgc->nProc_x];
      prevVedgecnt = hg->dist_y[hgc->nProc_y];

#ifdef _DEBUG      
      /* UVC: load balance stats */
      Zoltan_PHG_LoadBalStat(zz, hg);
#endif
      
      if (hgp->output_level >= PHG_DEBUG_LIST) {
          uprintf(hgc,
                  "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
                  hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
                  hgp->redm_str,
                  hgp->coarsepartition_str, hgp->refinement_str, p);
          if (hgp->output_level > PHG_DEBUG_LIST) {
              err = Zoltan_HG_Info(zz, hg);
              if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
                  goto End;
          }
      }
      if (hgp->output_level >= PHG_DEBUG_PLOT)
        Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
         "coarsening plot");

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->match, hgc->Communicator);
      }
      if (vcycle_timing) {
        if (vcycle->timer_match < 0) {
          char str[80];
          sprintf(str, "VC Matching %d", hg->info);
          vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match,
                           hgc->Communicator);
      }

      /* Allocate and initialize Matching Array */
      if (hg->nVtx && !(match = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC (hg->nVtx*sizeof(ZOLTAN_GNO_TYPE)))) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_MEMERR;
      }
      for (i = 0; i < hg->nVtx; i++)
        match[i] = i;
        
      /* Calculate matching (packing or grouping) */

      err = Zoltan_PHG_Matching (zz, hg, match, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
        ZOLTAN_FREE (&match);
        goto End;
      }
      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match,
                          hgc->Communicator);

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->match, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->coarse, hgc->Communicator);
      }

      if (vcycle_timing) {
        if (vcycle->timer_coarse < 0) {
          char str[80];
          sprintf(str, "VC Coarsening %d", hg->info);
          vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse,
                           hgc->Communicator);
      }
            
      if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) {
        ZOLTAN_FREE (&match);
        ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL.");
        goto End;
      }

      /* Construct coarse hypergraph and LevelMap */
      err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap,
       &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, 
       &vcycle->comm_plan, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) 
        goto End;

      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse,
                          hgc->Communicator);
        
      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarse, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }

      ZOLTAN_FREE (&match);

      if ((err=allocVCycle(coarser))!= ZOLTAN_OK)
        goto End;
      vcycle = coarser;
      hg = vcycle->hg;

      if (hgc->nProc > 1 && hgp->ProRedL > 0) {
        local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;
	MPI_Allreduce(&local_nPins, &tot_nPins, 1, zoltan_gno_mpi_type, MPI_SUM,
		      hgc->Communicator);

	if (tot_nPins < (ZOLTAN_GNO_TYPE)(hgp->ProRedL * origVpincnt + 0.5)) {
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->procred, hgc->Communicator);
	  }
	  /* redistribute to half the processors */
	  origVpincnt = tot_nPins; /* update for processor reduction test */

	  if(hg->nVtx&&!(hg->vmap=(int*)ZOLTAN_MALLOC(hg->nVtx*sizeof(int)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: hg->vmap");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; i++)
	    hg->vmap[i] = i;

	  middle = (int)((float) (hgc->nProc-1) * hgp->ProRedL);

	  if (hgp->nProc_x_req!=1&&hgp->nProc_y_req!=1) { /* Want 2D decomp */
	    if ((middle+1) > SMALL_PRIME && Zoltan_PHG_isPrime(middle+1))
	      --middle; /* if it was prime just use one less #procs (since
			   it should be bigger than SMALL_PRIME it is safe to
			   decrement) */
	  }

	  if (!(hgc = (PHGComm*) ZOLTAN_MALLOC (sizeof(PHGComm)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: PHGComm");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  if (!(redistributed=newVCycle(zz,NULL,NULL,vcycle,vcycle_timing))) {
	    ZOLTAN_FREE (&hgc);
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "redistributed is NULL.");
	    goto End;
	  }

	  Zoltan_PHG_Redistribute(zz,hgp,hg,0,middle,hgc, redistributed->hg,
				  &vcycle->vlno,&vcycle->vdest);
	  if (hgp->UseFixedVtx || hgp->UsePrefPart)
            redistributed->hg->bisec_split = hg->bisec_split;

	  if ((err=allocVCycle(redistributed))!= ZOLTAN_OK)
	    goto End;
	  vcycle = redistributed;

	  if (hgc->myProc < 0)
	    /* I'm not in the redistributed part so I should go to uncoarsening
	       refinement and wait */ {
	    if (fine_timing) {
	      if (timer->cpgather < 0)
		timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
	      if (timer->cprefine < 0)
		timer->cprefine =Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
	      if (timer->cpart < 0)
		timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");
	    }
	    if (do_timing) {
	      ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	      ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	    }
	    goto Refine;
	  }

	  hg = vcycle->hg;
	  hg->redl = hgp->redl; /* not set with hg creation */
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	  }
	}
      }
  }

  if (hgp->output_level >= PHG_DEBUG_LIST) {
    uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
     hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
     hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p);
    if (hgp->output_level > PHG_DEBUG_LIST) {
      err = Zoltan_HG_Info(zz, hg);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
        goto End;
    }
  }
  if (hgp->output_level >= PHG_DEBUG_PLOT)
    Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
     "coarsening plot");

  /* free array that may have been allocated in matching */
  if (hgp->vtx_scal) {
    hgp->vtx_scal_size = 0;
    ZOLTAN_FREE(&(hgp->vtx_scal));
  }

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->coarsepart, hgc->Communicator);
  }

  /****** Coarse Partitioning ******/

  err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp);

  if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
    goto End;

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarsepart, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

Refine:
  del = vcycle;
  refine = 1;

  /****** Uncoarsening/Refinement ******/
  while (vcycle) {
    VCycle *finer = vcycle->finer;
    hg = vcycle->hg;

    if (refine && hgc->myProc >= 0) {
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->refine, hgc->Communicator);
      }
      if (vcycle_timing) {
	if (vcycle->timer_refine < 0) {
	  char str[80];
	  sprintf(str, "VC Refinement %d", hg->info);
	  vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str);
	}
	ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine,
			   hgc->Communicator);
      }

      err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp);
        
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->refine, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }
      if (vcycle_timing)
	ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine,
			  hgc->Communicator);

                          
      if (hgp->output_level >= PHG_DEBUG_LIST)     
	uprintf(hgc, 
		"FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n",
		hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
		hgp->redm_str,
		hgp->coarsepartition_str, hgp->refinement_str, p,
		Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, 0, p, 
                                           vcycle->Part),
		Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err));

      if (hgp->output_level >= PHG_DEBUG_PLOT)
	Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part,
			"partitioned plot");
    }

    if (finer)  {
      int *rbuffer;
            
      /* Project coarse partition to fine partition */
      if (finer->comm_plan) {
	refine = 1;
	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->project, hgc->Communicator);
	}
	if (vcycle_timing) {
	  if (vcycle->timer_project < 0) {
	    char str[80];
	    sprintf(str, "VC Project Up %d", hg->info);
	    vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str);
	  }
	  ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project,
			     hgc->Communicator);
	}
        
	/* easy to assign partitions to internal matches */
	for (i = 0; i < finer->hg->nVtx; i++)
	  if (finer->LevelMap[i] >= 0)   /* if considers only the local vertices */
	    finer->Part[i] = vcycle->Part[finer->LevelMap[i]];
          
	/* now that the course partition assignments have been propagated */
	/* upward to the finer level for the local vertices, we need to  */    
	/* fill the LevelData (matched pairs of a local vertex with a    */
	/* off processor vertex) with the partition assignment of the    */
	/* local vertex - can be done totally in the finer level!        */    
	for (i = 0; i < finer->LevelCnt; i++)  {
	  ++i;          /* skip over off processor lno */
	  finer->LevelData[i] = finer->Part[finer->LevelData[i]]; 
	}
            
	/* allocate rec buffer to exchange LevelData information */
	rbuffer = NULL;
	if (finer->LevelSndCnt > 0)  {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int));
	  if (!rbuffer)    {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}       
      
	/* get partition assignments from owners of externally matched vtxs */  
	Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i);
	Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, 
         (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer);

	/* process data to assign partitions to expernal matches */
	for (i = 0; i < 2 * finer->LevelSndCnt;)  {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];      
	  finer->Part[lno] = partition;         
	}

	ZOLTAN_FREE (&rbuffer);                  
	Zoltan_Comm_Destroy (&finer->comm_plan);                   

	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->project, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	}
	if (vcycle_timing)
	  ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project,
			    hgc->Communicator);
      } else {
	int *sendbuf = NULL, size;
	refine = 0;
	/* ints local and partition numbers */
	if (finer->vlno) {
	  sendbuf = (int*) ZOLTAN_MALLOC (2 * hg->nVtx * sizeof(int));
	  if (!sendbuf) {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; ++i) {
	    sendbuf[2 * i] = finer->vlno[i];     /* assign local numbers */
	    sendbuf[2 * i + 1] = vcycle->Part[i];/* assign partition numbers */
	  }
	}

	ZOLTAN_FREE (&hgc);
	hgc = finer->hg->comm; /* updating hgc is required when the processors
				   change */
	/* Create comm plan to unredistributed processors */
	err = Zoltan_Comm_Create(&finer->comm_plan, finer->vlno ? hg->nVtx : 0,
				 finer->vdest, hgc->Communicator, COMM_TAG+2,
				 &size);

	if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
	  ZOLTAN_PRINT_ERROR(hgc->myProc, yo, "Zoltan_Comm_Create failed.");
	  goto End;
	}

	/* allocate rec buffer to exchange sendbuf information */
	rbuffer = NULL;
	if (finer->hg->nVtx) {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->hg->nVtx * sizeof(int));

	  if (!rbuffer) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}

	/* Use plan to send partitions to the unredistributed processors */

	Zoltan_Comm_Do(finer->comm_plan, COMM_TAG+3, (char *) sendbuf,
		       2*sizeof(int), (char *) rbuffer);

	MPI_Bcast(rbuffer, 2*finer->hg->nVtx, MPI_INT, 0, hgc->col_comm);
	
	/* process data to assign partitions to unredistributed processors */
	for (i = 0; i < 2 * finer->hg->nVtx;) {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];
	  finer->Part[lno] = partition;
	}

	if (finer->vlno)
	  ZOLTAN_FREE (&sendbuf);

	ZOLTAN_FREE (&rbuffer);
	Zoltan_Comm_Destroy (&finer->comm_plan);
      }
    }

    vcycle = finer;
  }       /* while (vcycle) */
    
End:
  vcycle = del;
  while (vcycle) {
    if (vcycle_timing) {
      Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout);
      Zoltan_Timer_Destroy(&vcycle->timer);
    }
    if (vcycle->finer) {   /* cleanup by level */
      Zoltan_HG_HGraph_Free (vcycle->hg);

      if (vcycle->LevelData)
	Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg);
      else if (vcycle->vlno)
	Zoltan_Multifree (__FILE__, __LINE__, 5, &vcycle->Part, &vcycle->vdest,
			  &vcycle->vlno, &vcycle->LevelMap, &vcycle->hg);
      else
	Zoltan_Multifree (__FILE__, __LINE__, 3, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->hg);
    }
    else                   /* cleanup top level */
      Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap,
                        &vcycle->LevelData);
    del = vcycle;
    vcycle = vcycle->finer;
    ZOLTAN_FREE(&del);
  }

  if (reset_geometric_matching) {
    strcpy(hgp->redm_str, reset_geometric_string);
    Zoltan_PHG_Set_Matching_Fn(hgp);
  }

  if (do_timing)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
  ZOLTAN_TRACE_EXIT(zz, yo) ;
  return err;
}
示例#13
0
int Zoltan_Order(
    ZZ *zz,               /* Zoltan structure */
    int num_gid_entries, /* # of entries for a global id */
    int num_obj,		/* Number of objects to order */
    ZOLTAN_ID_PTR gids,   /* List of global ids (local to this proc) */
    /* The application must allocate enough space */
    int *rank,            /* rank[i] is the rank of gids[i] */
    int *iperm            /* iperm[rank[i]]=i, only for sequential ordering */
)
{
    /*
     * Main user-call for ordering.
     * Input:
     *   zz, a Zoltan structure with appropriate function pointers set.
     *   gids, a list of global ids or enough space to store such a list
     *   lids, a list of local ids or enough space to store such a list
     * Output:
     *   num_gid_entries
     *   num_lid_entries
     *   gids, a list of global ids (filled in if empty on entry)
     *   lids, a list of local ids (filled in if empty on entry)
     *   rank, rank[i] is the global rank of gids[i]
     * Return values:
     *   Zoltan error code.
     */

    char *yo = "Zoltan_Order";
    int ierr;
    double start_time, end_time;
    double order_time[2] = {0.0,0.0};
    char msg[256];
    int comm[2],gcomm[2];
    ZOLTAN_ORDER_FN *Order_fn;
    struct Zoltan_Order_Options opt;
    int * vtxdist = NULL;
    ZOLTAN_ID_PTR local_gids=NULL, lids=NULL;
    int local_num_obj;
    int *local_rank = NULL, *local_iperm=NULL;
    struct Zoltan_DD_Struct *dd = NULL;


    ZOLTAN_TRACE_ENTER(zz, yo);

    if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
        Zoltan_Print_Key_Params(zz);

    start_time = Zoltan_Time(zz->Timer);

    /*
     * Compute Max number of array entries per ID over all processors.
     * This is a sanity-maintaining step; we don't want different
     * processors to have different values for these numbers.
     */
    comm[0] = zz->Num_GID;
    comm[1] = zz->Num_LID;
    MPI_Allreduce(comm, gcomm, 2, MPI_INT, MPI_MAX, zz->Communicator);
    zz->Num_GID = gcomm[0];

    if (num_gid_entries != zz->Num_GID) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "num_gid_entries doesn't have the good value");
        return (ZOLTAN_FATAL);
    }


    zz->Order.nbr_objects = num_obj;
    zz->Order.rank = rank;
    zz->Order.iperm = iperm;
    zz->Order.gids = gids;
    zz->Order.lids = lids;
    zz->Order.start = NULL;
    zz->Order.ancestor = NULL;
    zz->Order.leaves = NULL;
    zz->Order.nbr_leaves = 0;
    zz->Order.nbr_blocks = 0;

    /*
     *  Return if this processor is not in the Zoltan structure's
     *  communicator.
     */

    if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_OK);
    }

    /*
     *  Get ordering options from parameter list.
     */

    /* Set default parameter values */
    strncpy(opt.method, "PARMETIS", MAX_PARAM_STRING_LEN);
#ifdef HAVE_MPI
    strncpy(opt.order_type, "DIST", MAX_PARAM_STRING_LEN);
#else
    strncpy(opt.order_type, "SERIAL", MAX_PARAM_STRING_LEN);
#endif /* HAVE_MPI */

    opt.use_order_info = 0;
    opt.start_index = 0;
    opt.reorder = 0;

    Zoltan_Bind_Param(Order_params, "ORDER_METHOD", (void *) opt.method);
    Zoltan_Bind_Param(Order_params, "ORDER_TYPE",   (void *) opt.order_type);
    Zoltan_Bind_Param(Order_params, "ORDER_START_INDEX", (void *) &opt.start_index);
    Zoltan_Bind_Param(Order_params, "REORDER",      (void *) &opt.reorder);
    Zoltan_Bind_Param(Order_params, "USE_ORDER_INFO", (void *) &opt.use_order_info);

    Zoltan_Assign_Param_Vals(zz->Params, Order_params, zz->Debug_Level,
                             zz->Proc, zz->Debug_Proc);

    zz->Order.start_index = opt.start_index;

    /*
     *  Check that the user has allocated space for the return args.
     */
    if (!(gids && rank)) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input argument is NULL. Please allocate all required arrays before calling this routine.");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    /*
     *  Find the selected method.
     */

    if (!strcmp(opt.method, "NONE")) {
        if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
            ZOLTAN_PRINT_WARN(zz->Proc, yo, "Ordering method selected == NONE; no ordering performed\n");

        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_WARN);
    }
#ifdef ZOLTAN_PARMETIS
    else if (!strcmp(opt.method, "NODEND")) {
        Order_fn = Zoltan_ParMetis_Order;
    }
    else if (!strcmp(opt.method, "METIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        /* Set ORDER_METHOD to NODEND and ORDER_TYPE to LOCAL */
        strcpy(opt.method, "NODEND");
        strcpy(opt.order_type, "LOCAL");
    }
    else if (!strcmp(opt.method, "PARMETIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        /* Set ORDER_METHOD to NODEND and ORDER_TYPE to LOCAL */
        strcpy(opt.method, "NODEND");
        strcpy(opt.order_type, "GLOBAL");
    }
#endif /* ZOLTAN_PARMETIS */
#ifdef ZOLTAN_SCOTCH
    else if (!strcmp(opt.method, "SCOTCH")) {
        Order_fn = Zoltan_Scotch_Order;
        /* Set ORDER_METHOD to NODEND and ORDER_TYPE to LOCAL */
        strcpy(opt.method, "NODEND");
        /*     strcpy(opt.order_type, "GLOBAL"); */
    }
#endif /* ZOLTAN_SCOTCH */
    else {
        fprintf(stderr, "%s\n", opt.method);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unknown ordering method");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    if (!strcmp(opt.order_type, "GLOBAL"))
        strcpy (opt.order_type, "DIST");
    if (!strcmp(opt.order_type, "LOCAL"))
        strcpy (opt.order_type, "SERIAL");

    strcpy(zz->Order.order_type, opt.order_type);


    /*
     *  Construct the heterogenous machine description.
     */

    ierr = Zoltan_Build_Machine_Desc(zz);

    if (ierr == ZOLTAN_FATAL) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ierr);
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done machine description");

    /*
     * Call the actual ordering function.
     * Compute gid according to the local graph.
     */


    if (zz->Get_Num_Obj != NULL) {
        local_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Get_Num_Obj.");
            return (ierr);
        }
    }
    else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register ZOLTAN_NUM_OBJ_FN.");
        return (ZOLTAN_FATAL);
    }

    local_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, local_num_obj);
    local_rank = (int*) ZOLTAN_MALLOC(local_num_obj*sizeof(int));
    local_iperm = (int*) ZOLTAN_MALLOC(local_num_obj*sizeof(int));

    lids = ZOLTAN_MALLOC_LID_ARRAY(zz, local_num_obj);

    ierr = (*Order_fn)(zz, local_num_obj, local_gids, lids, local_rank, local_iperm, &opt);
    ZOLTAN_FREE(&lids);

    if (ierr) {
        sprintf(msg, "Ordering routine returned error code %d.", ierr);
        if (ierr == ZOLTAN_WARN) {
            ZOLTAN_PRINT_WARN(zz->Proc, yo, msg);
        } else {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
            Zoltan_Multifree(__FILE__, __LINE__, 3,
                             &local_gids, &local_rank, &local_iperm);
            ZOLTAN_TRACE_EXIT(zz, yo);
            return (ierr);
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done ordering");

    /*   Compute inverse permutation if necessary */
    if ((!(opt.return_args & RETURN_RANK) && (rank != NULL))
            || (!(opt.return_args & RETURN_IPERM) && (iperm != NULL))) {
        ierr = Zoltan_Get_Distribution(zz, &vtxdist);
        if (ierr) {
            /* Error */
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Get_Distribution.\n");
            return (ierr);
        }

        if (!(opt.return_args & RETURN_RANK) && (rank != NULL)) {
            /* Compute rank from iperm */
            ZOLTAN_TRACE_DETAIL(zz, yo, "Inverting permutation");
            Zoltan_Inverse_Perm(zz, local_iperm, local_rank, vtxdist, opt.order_type, opt.start_index);
        }
        else if (!(opt.return_args & RETURN_IPERM) && (iperm != NULL)) {
            /* Compute iperm from rank */
            ZOLTAN_TRACE_DETAIL(zz, yo, "Inverting permutation");
            Zoltan_Inverse_Perm(zz, local_rank, local_iperm, vtxdist, opt.order_type, opt.start_index);
        }
        ZOLTAN_FREE(&vtxdist);
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done Invert Permutation");


    /* TODO: Use directly the "graph" structure to avoid to duplicate things. */
    /* I store : GNO, rank, iperm */
    ierr = Zoltan_DD_Create (&dd, zz->Communicator, zz->Num_GID, (local_rank==NULL)?0:1, (local_iperm==NULL)?0:1, local_num_obj, 0);
    /* Hope a linear assignment will help a little */
    Zoltan_DD_Set_Neighbor_Hash_Fn1(dd, local_num_obj);
    /* Associate all the data with our xGNO */
    Zoltan_DD_Update (dd, local_gids, (ZOLTAN_ID_PTR)local_rank, (ZOLTAN_ID_PTR) local_iperm, NULL, local_num_obj);

    ZOLTAN_FREE(&local_gids);
    ZOLTAN_FREE(&local_rank);
    ZOLTAN_FREE(&local_iperm);

    Zoltan_DD_Find (dd, gids, (ZOLTAN_ID_PTR)rank, (ZOLTAN_ID_PTR)iperm, NULL,
                    num_obj, NULL);
    Zoltan_DD_Destroy(&dd);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done Registering results");


    end_time = Zoltan_Time(zz->Timer);
    order_time[0] = end_time - start_time;

    if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) {
        int i;
        Zoltan_Print_Sync_Start(zz->Communicator, TRUE);
        printf("ZOLTAN: rank for ordering on Proc %d\n", zz->Proc);
        for (i = 0; i < num_obj; i++) {
            printf("GID = ");
            ZOLTAN_PRINT_GID(zz, &(gids[i*(num_gid_entries)]));
            printf(", rank = %3d\n", rank[i]);
        }
        printf("\n");
        Zoltan_Print_Sync_End(zz->Communicator, TRUE);
    }


    /* Print timing info */
    if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) {
        if (zz->Proc == zz->Debug_Proc) {
            printf("ZOLTAN Times:  \n");
        }
        Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, order_time[0],
                            "ZOLTAN     Balance:     ");
    }

    ZOLTAN_TRACE_EXIT(zz, yo);
    if (ierr)
        return (ierr);
    else
        return (ZOLTAN_OK);
}
示例#14
0
int Zoltan_PHG_Redistribute(
  ZZ *zz, 
  PHGPartParams *hgp,     /* Input: parameters; used only for user's
                             request of nProc_x and nProc_y */
  HGraph  *ohg,           /* Input: Local part of distributed hypergraph */
  int     lo, int hi,     /* Input: range of proc ranks (inclusive)
                             to be included in new communicator: ncomm */
  PHGComm *ncomm,         /* Output: Communicators of new distribution */
  HGraph  *nhg,           /* Output: Newly redistributed hypergraph */
  int     **vmap,         /* Output: allocated with the size nhg->nVtx and
                             vertex map from nhg to ohg's local vertex number*/
  int     **vdest         /* Output: allocated with the size nhg->nVtx and
                             stores dest proc in ocomm */
    )   
{
    char * yo = "Zoltan_PHG_Redistribute";
    PHGComm *ocomm = ohg->comm;
    int     *v2Col, *n2Row, ierr=ZOLTAN_OK, i, *ranks;
    int     reqx=hgp->nProc_x_req, reqy=hgp->nProc_y_req;
    float   frac;
    MPI_Group allgrp, newgrp;
    MPI_Comm  nmpicomm;

    if (ocomm->nProc==1){
        errexit("%s: ocomm->nProc==1", yo);
        return ZOLTAN_FATAL;
    }

    /* create a new communicator for procs[lo..hi] */
    MPI_Comm_group(ocomm->Communicator, &allgrp);
    ranks = (int *) ZOLTAN_MALLOC(ocomm->nProc * sizeof(int));
    for (i=lo; i<=hi; ++i)
        ranks[i-lo] = i;
    
    MPI_Group_incl(allgrp, hi-lo+1, ranks, &newgrp);
    MPI_Comm_create(ocomm->Communicator, newgrp, &nmpicomm);
    MPI_Group_free(&newgrp);
    MPI_Group_free(&allgrp);   
    ZOLTAN_FREE(&ranks);

    if (reqx==1 || reqy==1)
        ;
    else
        reqx = reqy = -1;
    
    /* fill ncomm */
    ierr = Zoltan_PHG_Set_2D_Proc_Distrib(ocomm->zz, nmpicomm, 
                                          ocomm->myProc-lo, hi-lo+1, 
                                          reqx, reqy, ncomm);
    
    v2Col = (int *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(int));    
    n2Row = (int *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(int));

    /* UVC: TODO very simple straight forward partitioning right now;
       later we can implement a more "load balanced", or smarter
       mechanisms */
    frac = (float) ohg->nVtx / (float) ncomm->nProc_x;
    for (i=0; i<ohg->nVtx; ++i) 
        v2Col[i] = (int) ((float) i / frac);
    frac = (float) ohg->nEdge / (float) ncomm->nProc_y;
    for (i=0; i<ohg->nEdge; ++i) 
        n2Row[i] = (int) ((float) i / frac);

    ierr |= Zoltan_PHG_Redistribute_Hypergraph(zz, ohg, lo, v2Col, n2Row, ncomm, nhg, vmap, vdest);
    Zoltan_Multifree(__FILE__, __LINE__, 2,
                     &v2Col, &n2Row);
    
    return ierr;
}
示例#15
0
/*  Main partitioning function for hypergraph partitioning. */
int Zoltan_PHG_Partition (
  ZZ *zz,               /* Zoltan data structure */
  HGraph *hg,           /* Input hypergraph to be partitioned */
  int p,                /* Input:  number partitions to be generated */
  float *part_sizes,    /* Input:  array of length p containing percentages
                           of work to be assigned to each partition */
  Partition parts,      /* Input:  initial partition #s; aligned with vtx 
                           arrays. 
                           Output:  computed partition #s */
  PHGPartParams *hgp,   /* Input:  parameters for hgraph partitioning. */
  int level)
{

  PHGComm *hgc = hg->comm;
  VCycle  *vcycle=NULL, *del=NULL;
  int  i, err = ZOLTAN_OK;
  int  prevVcnt     = 2*hg->dist_x[hgc->nProc_x];
  int  prevVedgecnt = 2*hg->dist_y[hgc->nProc_y];
  char *yo = "Zoltan_PHG_Partition";
  static int timer_match = -1,    /* Timers for various stages */
             timer_coarse = -1,   /* Declared static so we can accumulate */
             timer_refine = -1,   /* times over calls to Zoltan_PHG_Partition */
             timer_coarsepart = -1,
             timer_project = -1,
             timer_vcycle = -1;   /* times everything in Vcycle not included
                                     in above timers */
  int do_timing = (hgp->use_timers > 1);
  int vcycle_timing = (hgp->use_timers > 4);

  ZOLTAN_TRACE_ENTER(zz, yo);
    
  if (do_timing) {
    if (timer_vcycle < 0) 
      timer_vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle");
    if (timer_match < 0) 
      timer_match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching");
    if (timer_coarse < 0) 
      timer_coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening");
    if (timer_coarsepart < 0)
      timer_coarsepart = Zoltan_Timer_Init(zz->ZTime, 1,
                                           "Coarse_Partition");
    if (timer_refine < 0) 
      timer_refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement");
    if (timer_project < 0) 
      timer_project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up");

    ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
  }

  if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) {
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL.");
    return ZOLTAN_MEMERR;
  }

  /****** Coarsening ******/    
#define COARSEN_FRACTION_LIMIT 0.9  /* Stop if we don't make much progress */
  while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > hg->redl)
    && ((hg->dist_x[hgc->nProc_x] < (int) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5))
     || (hg->dist_y[hgc->nProc_y] < (int) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5)))
    && hg->dist_y[hgc->nProc_y] && hgp->matching) {
      int *match = NULL;
      VCycle *coarser=NULL;
        
      prevVcnt     = hg->dist_x[hgc->nProc_x];
      prevVedgecnt = hg->dist_y[hgc->nProc_y];

#ifdef _DEBUG      
      /* UVC: load balance stats */
      Zoltan_PHG_LoadBalStat(zz, hg);
#endif
      
      if (hgp->output_level >= PHG_DEBUG_LIST) {
          uprintf(hgc,
                  "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
                  hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
                  hgp->redm_str,
                  hgp->coarsepartition_str, hgp->refinement_str, p);
          if (hgp->output_level > PHG_DEBUG_LIST) {
              err = Zoltan_HG_Info(zz, hg);
              if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
                  goto End;
          }
      }
      if (hgp->output_level >= PHG_DEBUG_PLOT)
        Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
         "coarsening plot");

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_match, hgc->Communicator);
      }
      if (vcycle_timing) {
        if (vcycle->timer_match < 0) {
          char str[80];
          sprintf(str, "VC Matching %d", hg->info);
          vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match,
                           hgc->Communicator);
      }

      /* Allocate and initialize Matching Array */
      if (hg->nVtx && !(match = (int*) ZOLTAN_MALLOC (hg->nVtx*sizeof(int)))) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array");
        return ZOLTAN_MEMERR;
      }
      for (i = 0; i < hg->nVtx; i++)
        match[i] = i;
        
      /* Calculate matching (packing or grouping) */
      err = Zoltan_PHG_Matching (zz, hg, match, hgp);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
        ZOLTAN_FREE ((void**) &match);
        goto End;
      }
      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match,
                          hgc->Communicator);

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_match, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_coarse, hgc->Communicator);
      }

      if (vcycle_timing) {
        if (vcycle->timer_coarse < 0) {
          char str[80];
          sprintf(str, "VC Coarsening %d", hg->info);
          vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse,
                           hgc->Communicator);
      }
            
      if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) {
        ZOLTAN_FREE ((void**) &match);
        ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL.");
        goto End;
      }

      /* Construct coarse hypergraph and LevelMap */
      err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap,
       &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, 
       &vcycle->comm_plan, hgp);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) 
        goto End;

      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse,
                          hgc->Communicator);
        
      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarse, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
      }

      ZOLTAN_FREE ((void**) &match);

      if ((err=allocVCycle(coarser))!= ZOLTAN_OK)
        goto End;
      vcycle = coarser;
      hg = vcycle->hg;
  }

  if (hgp->output_level >= PHG_DEBUG_LIST) {
    uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
     hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
     hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p);
    if (hgp->output_level > PHG_DEBUG_LIST) {
      err = Zoltan_HG_Info(zz, hg);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
        goto End;
    }
  }
  if (hgp->output_level >= PHG_DEBUG_PLOT)
    Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
     "coarsening plot");

  /* free array that may have been allocated in matching */
  if (hgp->vtx_scal) ZOLTAN_FREE(&(hgp->vtx_scal));

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer_coarsepart, hgc->Communicator);
  }

  /****** Coarse Partitioning ******/
  err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp);
  if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
    goto End;

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarsepart, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
  }

  del = vcycle;
  /****** Uncoarsening/Refinement ******/
  while (vcycle) {
    VCycle *finer = vcycle->finer;
    hg = vcycle->hg;

    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_refine, hgc->Communicator);
    }
    if (vcycle_timing) {
      if (vcycle->timer_refine < 0) {
        char str[80];
        sprintf(str, "VC Refinement %d", hg->info);
        vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str);
      }
      ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine,
                         hgc->Communicator);
    }

    err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp);
        
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_refine, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
    }
    if (vcycle_timing)
      ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine,
                        hgc->Communicator);

                          
    if (hgp->output_level >= PHG_DEBUG_LIST)     
      uprintf(hgc, 
              "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n",
              hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
              hgp->redm_str,
              hgp->coarsepartition_str, hgp->refinement_str, p,
              Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, p, vcycle->Part),
              Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err));

    if (hgp->output_level >= PHG_DEBUG_PLOT)
      Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part,
       "partitioned plot");
        
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_project, hgc->Communicator);
    }
    if (vcycle_timing) {
      if (vcycle->timer_project < 0) {
        char str[80];
        sprintf(str, "VC Project Up %d", hg->info);
        vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str);
      }
      ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project,
                         hgc->Communicator);
    }

    /* Project coarse partition to fine partition */
    if (finer)  { 
      int *rbuffer;
            
      /* easy to undo internal matches */
      for (i = 0; i < finer->hg->nVtx; i++)
        if (finer->LevelMap[i] >= 0)
          finer->Part[i] = vcycle->Part[finer->LevelMap[i]];
          
      /* fill sendbuffer with part data for external matches I owned */    
      for (i = 0; i < finer->LevelCnt; i++)  {
        ++i;          /* skip return lno */
        finer->LevelData[i] = finer->Part[finer->LevelData[i]]; 
      }
            
      /* allocate rec buffer */
      rbuffer = NULL;
      if (finer->LevelSndCnt > 0)  {
        rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int));
        if (!rbuffer)    {
          ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
          return ZOLTAN_MEMERR;
        }
      }       
      
      /* get partition assignments from owners of externally matchted vtxs */  
      Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i);
      Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, 
       (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer);

      /* process data to undo external matches */
      for (i = 0; i < 2 * finer->LevelSndCnt;)  {
        int lno, partition;
        lno       = rbuffer[i++];
        partition = rbuffer[i++];      
        finer->Part[lno] = partition;         
      }

      ZOLTAN_FREE (&rbuffer);                  
      Zoltan_Comm_Destroy (&finer->comm_plan);                   
    }
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_project, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
    }
    if (vcycle_timing)
      ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project,
                        hgc->Communicator);

    vcycle = finer;
  }       /* while (vcycle) */
    
End:
  vcycle = del;
  while (vcycle) {
    if (vcycle_timing) {
      Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout);
      Zoltan_Timer_Destroy(&vcycle->timer);
    }
    if (vcycle->finer) {   /* cleanup by level */
      Zoltan_HG_HGraph_Free (vcycle->hg);
      Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part, &vcycle->LevelMap,
                        &vcycle->LevelData, &vcycle->hg);
    }
    else                   /* cleanup top level */
      Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap,
                        &vcycle->LevelData);
    del = vcycle;
    vcycle = vcycle->finer;
    ZOLTAN_FREE(&del);
  }

  if (do_timing)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
  ZOLTAN_TRACE_EXIT(zz, yo) ;
  return err;
}
示例#16
0
int Zoltan_Migrate(
    ZZ *zz,                      /* Zoltan structure.                  */
    int num_import,              /* Number of non-local objects assigned to the
                                  processor in the new decomposition.        */
    ZOLTAN_ID_PTR import_global_ids, /* Array of global IDs for non-local objects
                                  assigned to this processor in the new
                                  decomposition; this field can be NULL if
                                  the application doesn't provide import IDs.*/
    ZOLTAN_ID_PTR import_local_ids,  /* Array of local IDs for non-local objects
                                  assigned to the processor in the new
                                  decomposition; this field can be NULL if the
                                  application does not provide import IDs.   */
    int *import_procs,           /* Array of processor IDs of processors owning
                                  the non-local objects that are assigned to
                                  this processor in the new decomposition; this
                                  field can be NULL if the application does
                                  not provide import IDs.                    */
    int *import_to_part,         /* Array of partition numbers to which imported
                                  objects should be assigned.                */
    int num_export,              /* Number of objs to be exported
                                  to other processors to establish the new
                                  decomposition.                             */
    ZOLTAN_ID_PTR export_global_ids, /* Array of global IDs of
                                  objects to be exported to other processors
                                  to establish the new decomposition.        */
    ZOLTAN_ID_PTR export_local_ids,  /* Array of local IDs of
                                  objects to be exported to other processors
                                  to establish the new decomposition.        */
    int *export_procs,           /* Array of processor IDs
                                  to which objects will be exported
                                  to establish the new decomposition.        */
    int *export_to_part          /* Array of partition numbers to which exported
                                  objects should be assigned.                */
)
{
    /*
     *  Routine to help perform migration.  If migration pre-processing routine
     *  (ZOLTAN_PRE_MIGRATE_FN) is specified, this routine first calls that fn.
     *  It then calls a function to obtain the size of the migrating objects
     *  (ZOLTAN_OBJ_SIZE_FN).  The routine next calls an application-specified
     *  object packing routine (ZOLTAN_PACK_OBJ_FN) for each object
     *  to be exported.  It develops the needed communication map to move the
     *  objects to other processors.  It performs the communication according
     *  to the map, and then calls an application-specified object unpacking
     *  routine (ZOLTAN_UNPACK_OBJ_FN) for each object imported.
     */

    char *yo = "Zoltan_Migrate";
    int num_gid_entries, num_lid_entries;  /* lengths of global & local ids */
    int *sizes = NULL;       /* sizes (in bytes) of the object data for export. */
    int id_size;             /* size (in bytes) of ZOLTAN_GID + padding for
                            alignment                                       */
    int tag_size;            /* size (in bytes) of ZOLTAN_GID + one int
                            (for message size) */
    char *export_buf = NULL; /* buffer for packing export data.                 */
    char *import_buf = NULL; /* buffer for receiving imported data.             */
    char *tmp;               /* temporary pointer into buffers.                 */
    int i;                   /* loop counter.                                   */
    int tmp_size;            /* size of a single object's data.                 */
    int *idx = NULL;         /* index used for multi-fn packs and unpacks.      */
    int idx_cnt = 0;         /* index counter for idx array.                    */
    ZOLTAN_ID_PTR tmp_id = NULL; /* pointer to storage for a global ID in comm
                                buf  */
    ZOLTAN_ID_PTR lid;       /* temporary pointer to a local ID; used to pass
                            NULL to query functions when NUM_LID_ENTRIES=0. */
    ZOLTAN_COMM_OBJ *imp_plan = NULL; /* Comm obj built from import lists. */
    ZOLTAN_COMM_OBJ *exp_plan = NULL; /* Comm obj built from export lists. */
    int msgtag, msgtag2;     /* Tags for communication routines                 */
    int total_send_size;     /* Total size of outcoming message (in #items)     */
    int total_recv_size;     /* Total size of incoming message (in #items)      */
    int aligned_int;         /* size of an int padded for alignment             */
    int dest;                /* temporary destination partition.                */
    int include_parts = 0;   /* flag indicating whether partition info is
                            provided */
    int ierr = ZOLTAN_OK;
    int actual_num_exp = 0;
    int actual_exp_allocated = 0;
    ZOLTAN_ID_PTR actual_exp_gids = NULL;    /* Arrays containing only objs to  */
    ZOLTAN_ID_PTR actual_exp_lids = NULL;    /* actually be packed.  Objs that  */
    int *actual_exp_procs = NULL;            /* are changing partition but not  */
    int *actual_exp_to_part = NULL;          /* processor may not be included.  */
    int actual_num_imp = 0;
    int actual_imp_allocated = 0;
    ZOLTAN_ID_PTR actual_imp_gids = NULL;    /* Arrays containing only objs to  */
    ZOLTAN_ID_PTR actual_imp_lids = NULL;    /* actually be imported. Objs that  */
    int *actual_imp_procs = NULL;            /* are changing partition but not  */
    int *actual_imp_to_part = NULL;          /* processor may not be included.  */

    ZOLTAN_TRACE_ENTER(zz, yo);

    /*
     *  Return if this processor is not in the Zoltan structure's
     *  communicator.
     */

    if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) {
        goto End;
    }

    /*
     *  Check that all procs use the same id types.
     */

    ierr = check_input(zz,
                       ((num_export >= 0 && export_to_part) ||
                        (num_import >= 0 && import_to_part)),
                       &include_parts);
    if (ierr != ZOLTAN_OK)
        goto End;

    num_gid_entries = zz->Num_GID;
    num_lid_entries = zz->Num_LID;

    /*
     *  Check that all necessary query functions are available.
     */

    if (zz->Get_Obj_Size == NULL && zz->Get_Obj_Size_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_OBJ_SIZE_FN or ZOLTAN_OBJ_SIZE_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Pack_Obj == NULL && zz->Pack_Obj_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_PACK_OBJ_FN or ZOLTAN_PACK_OBJ_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Unpack_Obj == NULL && zz->Unpack_Obj_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_UNPACK_OBJ_FN or ZOLTAN_UNPACK_OBJ_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }


    if (num_export >= 0) {

        /* Build the actual export arrays */
        ierr = actual_arrays(zz, num_gid_entries, num_lid_entries,
                             num_export, export_global_ids, export_local_ids,
                             export_procs, export_to_part,
                             &actual_num_exp, &actual_exp_gids, &actual_exp_lids,
                             &actual_exp_procs, &actual_exp_to_part,
                             &actual_exp_allocated);
        if (ierr < 0)
            goto End;

        /* Compute communication map based on actual exports.  */

        msgtag = 32767;
        ierr = Zoltan_Comm_Create(&exp_plan, actual_num_exp, actual_exp_procs,
                                  zz->Communicator, msgtag, &actual_num_imp);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Error returned from Zoltan_Comm_Create.");
            goto End;
        }
    }

    else if (num_import >= 0) {

        /* Build the actual import arrays */
        ierr = actual_arrays(zz, num_gid_entries, num_lid_entries,
                             num_import, import_global_ids, import_local_ids,
                             import_procs, import_to_part,
                             &actual_num_imp, &actual_imp_gids, &actual_imp_lids,
                             &actual_imp_procs, &actual_imp_to_part,
                             &actual_imp_allocated);
        if (ierr < 0)
            goto End;

        /* Compute communication map based on imports.  */
        msgtag = 32767;
        ierr = Zoltan_Comm_Create(&imp_plan, actual_num_imp, actual_imp_procs,
                                  zz->Communicator, msgtag, &actual_num_exp);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Error returned from Zoltan_Comm_Create.");
            goto End;
        }

        /* Compute actual export lists for packing objects */
        if (actual_num_exp > 0) {
            actual_exp_allocated = 1;
            actual_exp_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, actual_num_exp);
            actual_exp_lids = ZOLTAN_MALLOC_LID_ARRAY(zz, actual_num_exp);
            actual_exp_procs = (int *) ZOLTAN_MALLOC(sizeof(int) * actual_num_exp);
            if (include_parts)
                actual_exp_to_part = (int *) ZOLTAN_MALLOC(sizeof(int)*actual_num_exp);
            if (actual_exp_gids == NULL ||
                    (num_lid_entries && actual_exp_lids == NULL) ||
                    actual_exp_procs == NULL ||
                    (import_to_part != NULL && actual_exp_to_part == NULL)) {
                Zoltan_Multifree(__FILE__, __LINE__, 4,
                                 &actual_exp_gids, &actual_exp_lids,
                                 &actual_exp_procs, &actual_exp_to_part);
                ierr = ZOLTAN_MEMERR;
                goto End;
            }
        }

        msgtag2 = 32766;
        ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_gids,
                              (int) (sizeof(ZOLTAN_ID_TYPE)*(num_gid_entries)),
                              (char *) actual_exp_gids);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
            goto End;
        }

        if (num_lid_entries) {
            msgtag2--;
            ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_lids,
                                  (int) (sizeof(ZOLTAN_ID_TYPE)*num_lid_entries),
                                  (char *) actual_exp_lids);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
                goto End;
            }
        }

        Zoltan_Comm_Info(imp_plan, NULL, NULL, NULL, NULL, NULL, NULL,
                         NULL, NULL, NULL, NULL, NULL, actual_exp_procs, NULL);

        if (include_parts) {
            msgtag2--;
            ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_to_part,
                                  (int) sizeof(int), (char *) actual_exp_to_part);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
                goto End;
            }
        }

        /* Create inverse plan (i.e., plan based on exports) so can set
         * variable sizes.
         * (Zoltan_Comm_Do_Reverse(imp_plan, ...) allows sending variable
         * but does not tell how large to allocate receive buffer.
         */
        ierr = Zoltan_Comm_Invert_Plan(&imp_plan);
        exp_plan = imp_plan;
        imp_plan = NULL;
    }
    else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Import or export lists needed.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Migrate.Pre_Migrate_PP != NULL) {
        zz->Migrate.Pre_Migrate_PP(zz->Migrate.Pre_Migrate_PP_Data,
                                   num_gid_entries, num_lid_entries,
                                   num_import, import_global_ids,
                                   import_local_ids, import_procs, import_to_part,
                                   num_export, export_global_ids,
                                   export_local_ids, export_procs, export_to_part,
                                   &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_PRE_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Pre_Migrate != NULL) {
        zz->Migrate.Pre_Migrate(zz->Migrate.Pre_Migrate_Data,
                                num_gid_entries, num_lid_entries,
                                num_import, import_global_ids,
                                import_local_ids, import_procs,
                                num_export, export_global_ids,
                                export_local_ids, export_procs,
                                &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_PRE_MIGRATE_FN function.");
            goto End;
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done pre-migration processing");

    id_size = Zoltan_Align(num_gid_entries * sizeof(ZOLTAN_ID_TYPE));
    /* Note that alignment is not strictly necessary
       when ZOLTAN_ID_TYPE is int or unsigned int. */
    aligned_int = Zoltan_Align(sizeof(int));
    tag_size = id_size + aligned_int;

    /*
     * For each object, allow space for its global ID and its data plus
     * one int (for the object data size).
     * Zoltan will pack the global IDs; the application must pack the data
     * through the pack routine.  Zoltan needs the global IDs for unpacking,
     * as the order of the data received during communication is not
     * necessarily the same order as import_global_ids[].
     * Zoltan also needs to communicate the sizes of the objects because
     * only the sender knows the size of each object.
     */
    if (actual_num_exp > 0) {
        sizes = (int *) ZOLTAN_MALLOC(actual_num_exp * sizeof(int));
        if (!sizes) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }

        if (zz->Get_Obj_Size_Multi != NULL) {
            zz->Get_Obj_Size_Multi(zz->Get_Obj_Size_Multi_Data,
                                   num_gid_entries, num_lid_entries, actual_num_exp,
                                   actual_exp_gids, actual_exp_lids, sizes, &ierr);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_OBJ_SIZE_MULTI function.");
                goto End;
            }
        }
        else {
            for (i = 0; i < actual_num_exp; i++) {
                lid = (num_lid_entries ? &(actual_exp_lids[i*num_lid_entries]) : NULL);
                sizes[i] = zz->Get_Obj_Size(zz->Get_Obj_Size_Data,
                                            num_gid_entries, num_lid_entries,
                                            &(actual_exp_gids[i*num_gid_entries]),
                                            lid, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_OBJ_SIZE function.");
                    goto End;
                }
            }
        }

        total_send_size = 0;

        for (i = 0; i < actual_num_exp; i++) {
            sizes[i] = Zoltan_Align(sizes[i]);
            total_send_size += sizes[i] + tag_size;
        }
        export_buf = (char *) ZOLTAN_CALLOC(total_send_size, sizeof(char));
        if (!export_buf) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }

        if (zz->Pack_Obj_Multi != NULL) {
            /* Allocate an index array for ZOLTAN_PACK_OBJ_MULTI_FN. */
            idx = (int *) ZOLTAN_MALLOC(actual_num_exp * sizeof(int));
            if (!idx) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
                ierr = ZOLTAN_MEMERR;
                goto End;
            }
        }

        /*
         *  Pack the objects for export.
         */

        idx_cnt = 0;
        tmp = export_buf;
        for (i = 0; i < actual_num_exp; i++) {

            /* Pack the object's global ID */
            tmp_id = (ZOLTAN_ID_PTR) tmp;
            ZOLTAN_SET_GID(zz, tmp_id, &(actual_exp_gids[i*num_gid_entries]));
            tmp += id_size;

            /* Pack the object's size */
            *((int *)tmp) = sizes[i];
            tmp += aligned_int;

            /* If using ZOLTAN_PACK_OBJ_MULTI_FN, build the index array. */
            idx_cnt += tag_size;
            if (idx != NULL) {
                idx[i] = idx_cnt;
            }
            tmp += sizes[i];
            idx_cnt += sizes[i];
        }

        if (zz->Pack_Obj_Multi != NULL) {
            if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                printf("[%1d] DEBUG in %s: Packing objects with multi-pack\n",
                       zz->Proc, yo);
            }
            zz->Pack_Obj_Multi(zz->Pack_Obj_Multi_Data,
                               num_gid_entries, num_lid_entries, actual_num_exp,
                               actual_exp_gids, actual_exp_lids,
                               (actual_exp_to_part!=NULL ? actual_exp_to_part
                                : actual_exp_procs),
                               sizes, idx, export_buf, &ierr);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_PACK_OBJ_MULTI function.");
                goto End;
            }
        }
        else {
            tmp = export_buf + tag_size;
            for (i = 0; i < actual_num_exp; i++) {
                if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                    printf("[%1d] DEBUG in %s: Packing object with gid ", zz->Proc, yo);
                    ZOLTAN_PRINT_GID(zz, &(actual_exp_gids[i*num_gid_entries]));
                    printf("size = %d bytes\n", sizes[i]);
                }

                /* Pack the object's data */
                lid = (num_lid_entries ? &(actual_exp_lids[i*num_lid_entries]) : NULL);
                dest = (actual_exp_to_part != NULL ? actual_exp_to_part[i]
                        : actual_exp_procs[i]);
                zz->Pack_Obj(zz->Pack_Obj_Data,
                             num_gid_entries, num_lid_entries,
                             &(actual_exp_gids[i*num_gid_entries]), lid, dest,
                             sizes[i], tmp, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_PACK_OBJ function.");
                    goto End;
                }
                tmp += sizes[i] + tag_size;
            }
        }
        ZOLTAN_FREE(&idx);
        tmp_id = NULL;
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done packing objects");


    /* Modify sizes[] to contain message sizes, not object sizes */
    for (i=0; i<actual_num_exp; i++) {
        sizes[i] += tag_size;
    }

    msgtag--;
    ierr = Zoltan_Comm_Resize(exp_plan, sizes, msgtag, &total_recv_size);
    if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Resize.");
        goto End;
    }

    if (actual_num_imp > 0) {
        import_buf = (char *) ZOLTAN_MALLOC(total_recv_size);
        if (!import_buf) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }
    }

    /*
     *  Send the export data using the communication plan.
     */

    msgtag2 = 32765;
    ierr = Zoltan_Comm_Do(exp_plan, msgtag2, export_buf, 1, import_buf);
    if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
        goto End;
    }

    /*
     *  Free whatever memory we can.
     */

    Zoltan_Comm_Destroy(&exp_plan);
    ZOLTAN_FREE(&export_buf);
    ZOLTAN_FREE(&sizes);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done communication");

    /*
     *  Perform application-specified processing before unpacking the data.
     */
    if (zz->Migrate.Mid_Migrate_PP != NULL) {
        zz->Migrate.Mid_Migrate_PP(zz->Migrate.Mid_Migrate_PP_Data,
                                   num_gid_entries, num_lid_entries,
                                   num_import, import_global_ids,
                                   import_local_ids, import_procs, import_to_part,
                                   num_export, export_global_ids,
                                   export_local_ids, export_procs, export_to_part,
                                   &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_MID_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Mid_Migrate != NULL) {
        zz->Migrate.Mid_Migrate(zz->Migrate.Mid_Migrate_Data,
                                num_gid_entries, num_lid_entries,
                                num_import, import_global_ids,
                                import_local_ids, import_procs,
                                num_export, export_global_ids,
                                export_local_ids, export_procs,
                                &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_MID_MIGRATE_FN function.");
            goto End;
        }
    }

    /*
     *  Unpack the object data.
     */

    if (actual_num_imp > 0) {

        if (zz->Unpack_Obj_Multi != NULL) {

            /* Allocate and fill input arrays for Unpack_Obj_Multi. */
            sizes = (int *) ZOLTAN_MALLOC(actual_num_imp * sizeof(int));
            tmp_id = (ZOLTAN_ID_PTR) ZOLTAN_MALLOC_GID_ARRAY(zz, actual_num_imp);
            idx = (int *) ZOLTAN_MALLOC(actual_num_imp * sizeof(int));
            if (!sizes || !tmp_id || !idx) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
                ierr = ZOLTAN_MEMERR;
                goto End;
            }

            tmp = import_buf;
            idx_cnt = 0;
            for (i = 0; i < actual_num_imp; i++) {

                /* Unpack the object's global ID */
                ZOLTAN_SET_GID(zz, &(tmp_id[i*num_gid_entries]), (ZOLTAN_ID_PTR) tmp);
                tmp += id_size;

                /* Unpack the object's size */
                sizes[i] = *((int *)tmp);
                tmp += aligned_int;

                /* If using ZOLTAN_UNPACK_OBJ_MULTI_FN, build the index array. */
                idx_cnt += tag_size;
                if (idx != NULL) {
                    idx[i] = idx_cnt;
                }

                tmp += sizes[i];
                idx_cnt += sizes[i];
            }

            if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                printf("[%1d] DEBUG in %s: Unpacking objects with multi-fn\n",
                       zz->Proc,yo);
            }
            zz->Unpack_Obj_Multi(zz->Unpack_Obj_Multi_Data, num_gid_entries,
                                 actual_num_imp, tmp_id, sizes, idx, import_buf, &ierr);
            ZOLTAN_FREE(&import_buf);
            ZOLTAN_FREE(&sizes);
            ZOLTAN_FREE(&tmp_id);
            ZOLTAN_FREE(&idx);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_UNPACK_OBJ_MULTI_FN.");
                goto End;
            }
        }
        else {
            tmp = import_buf;
            for (i = 0; i < actual_num_imp; i++) {
                tmp_size = *((int *)(tmp + id_size));
                if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                    printf("[%1d] DEBUG in %s: Unpacking object with gid ", zz->Proc, yo);
                    ZOLTAN_PRINT_GID(zz, (ZOLTAN_ID_PTR)tmp);
                    printf("size = %d bytes\n", tmp_size);
                }

                /* Unpack the object's data */

                zz->Unpack_Obj(zz->Unpack_Obj_Data, num_gid_entries,
                               (ZOLTAN_ID_PTR) tmp, tmp_size,
                               tmp + tag_size, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_UNPACK_OBJ_FN.");
                    goto End;
                }
                tmp += (tmp_size + tag_size);
            }
            ZOLTAN_FREE(&import_buf);
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done unpacking objects");

    if (zz->Migrate.Post_Migrate_PP != NULL) {
        zz->Migrate.Post_Migrate_PP(zz->Migrate.Post_Migrate_PP_Data,
                                    num_gid_entries, num_lid_entries,
                                    num_import, import_global_ids,
                                    import_local_ids, import_procs, import_to_part,
                                    num_export, export_global_ids,
                                    export_local_ids, export_procs, export_to_part,
                                    &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_POST_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Post_Migrate != NULL) {
        zz->Migrate.Post_Migrate(zz->Migrate.Post_Migrate_Data,
                                 num_gid_entries, num_lid_entries,
                                 num_import, import_global_ids,
                                 import_local_ids, import_procs,
                                 num_export, export_global_ids,
                                 export_local_ids, export_procs,
                                 &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_POST_MIGRATE_FN function.");
            goto End;
        }
    }

End:

    if (actual_exp_allocated) {
        Zoltan_Multifree(__FILE__, __LINE__, 4,
                         &actual_exp_gids, &actual_exp_lids,
                         &actual_exp_procs, &actual_exp_to_part);
    }
    if (actual_imp_allocated) {
        Zoltan_Multifree(__FILE__, __LINE__, 4,
                         &actual_imp_gids, &actual_imp_lids,
                         &actual_imp_procs, &actual_imp_to_part);
    }

    if (ierr < 0) {
        if (exp_plan) Zoltan_Comm_Destroy(&exp_plan);
        Zoltan_Multifree(__FILE__, __LINE__, 5,
                         &import_buf, &tmp_id, &sizes, &idx, &export_buf);
    }
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
}
示例#17
0
int Zoltan_HG_Create_Mirror (
  ZZ *zz,
  HGraph *hg
)
{
  int inlength, outlength;   /* input/output array lengths */
  int *index, *data;         /* pointers to input information */
  int *outindex, *outdata;
  char *yo = "Zoltan_HG_Create_Mirror";

  ZOLTAN_TRACE_ENTER(zz, yo);

  /* determine which data to "mirror" and set corresponding data pointers. */
  if (hg &&  (hg->nEdge == 0 || hg->hindex) && (hg->nPins == 0 || hg->hvertex)
   && !hg->vindex && !hg->vedge) {
    ZOLTAN_TRACE_DETAIL(zz, yo, "Have hindex; building vindex.");

    inlength  = hg->nEdge;
    outlength = hg->nVtx;
    index     = hg->hindex;
    data      = hg->hvertex;
    outindex  = hg->vindex = (int*) ZOLTAN_MALLOC((hg->nVtx+1) * sizeof(int));
    outdata   = hg->vedge  = (int*) ZOLTAN_MALLOC (hg->nPins * sizeof(int));

    if (outindex == NULL || (hg->nPins > 0 && outdata == NULL)) {
      Zoltan_Multifree (__FILE__, __LINE__, 2, &hg->vindex, &hg->vedge);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
  }
  else if (hg && (hg->nVtx == 0 || hg->vindex) && (hg->nPins == 0 || hg->vedge)
   && !hg->hindex && !hg->hvertex) {
    ZOLTAN_TRACE_DETAIL(zz, yo, "Have vindex; building hindex.");

    inlength  = hg->nVtx;
    outlength = hg->nEdge;
    index     = hg->vindex;
    data      = hg->vedge;
    outindex  = hg->hindex  = (int*) ZOLTAN_MALLOC((hg->nEdge+1) * sizeof(int));
    outdata   = hg->hvertex = (int*) ZOLTAN_MALLOC(hg->nPins * sizeof(int));

    if (outindex == NULL || (hg->nPins > 0 && outdata == NULL)) {
      Zoltan_Multifree (__FILE__, __LINE__, 2, &hg->hindex, &hg->hvertex);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
  }
  else {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input error.");
    return ZOLTAN_FATAL;  /* unable to proceed */
  }

  Zoltan_HG_Mirror(inlength, index, data, 
                    outlength, outindex, outdata);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ZOLTAN_OK;
}
示例#18
0
int Zoltan_PHG_Redistribute(
  ZZ *zz, 
  PHGPartParams *hgp,     /* Input: parameters; used only for user's
                             request of nProc_x and nProc_y */
  HGraph  *ohg,           /* Input: Local part of distributed hypergraph */
  int     lo, int hi,     /* Input: range of proc ranks (inclusive)
                             to be included in new communicator: ncomm */
  PHGComm *ncomm,         /* Output: Communicators of new distribution */
  HGraph  *nhg,           /* Output: Newly redistributed hypergraph */
  int     **vmap,         /* Output: allocated with the size nhg->nVtx and
                             vertex map from nhg to ohg's local vertex number*/
  int     **vdest         /* Output: allocated with the size nhg->nVtx and
                             stores dest proc in ocomm */
    )   
{
    char * yo = "Zoltan_PHG_Redistribute";
    PHGComm *ocomm = ohg->comm;
    int     *v2Col, *n2Row, ierr=ZOLTAN_OK, i, *ranks;
    int     reqx=hgp->nProc_x_req, reqy=hgp->nProc_y_req;
    float   frac;
    MPI_Group allgrp, newgrp;
    MPI_Comm  nmpicomm;

    if (ocomm->nProc==1){
        errexit("%s: ocomm->nProc==1", yo);
        return ZOLTAN_FATAL;
    }

    /* create a new communicator for procs[lo..hi] */
    MPI_Comm_group(ocomm->Communicator, &allgrp);
    ranks = (int *) ZOLTAN_MALLOC(ocomm->nProc * sizeof(int));
    for (i=lo; i<=hi; ++i)
        ranks[i-lo] = i;
    
    MPI_Group_incl(allgrp, hi-lo+1, ranks, &newgrp);
    MPI_Comm_create(ocomm->Communicator, newgrp, &nmpicomm);
    MPI_Group_free(&newgrp);
    MPI_Group_free(&allgrp);   
    ZOLTAN_FREE(&ranks);

    if (reqx==1 || reqy==1)
        ;
    else
        reqx = reqy = -1;
    
    /* fill ncomm */
    ierr = Zoltan_PHG_Set_2D_Proc_Distrib(ocomm->zz, nmpicomm, 
                                          ocomm->myProc-lo, hi-lo+1, 
                                          reqx, reqy, ncomm);
    
    v2Col = (int *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(int));    
    n2Row = (int *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(int));

    /* UVC: TODO very simple straight forward partitioning right now;
       later we can implement a more "load balanced", or smarter
       mechanisms */
    /* KDDKDD 5/11/07:  Round-off error in the computation of v2Col
     * and n2Row can lead to different answers on different platforms.
     * Vertices or edges get sent to different processors during the 
     * split, resulting in different matchings and, thus, different
     * answers.
     * Problem was observed on hg_cage10, zdrive.inp.phg.ipm.nproc_vertex1
     * and zdrive.inp.phg.ipm.nproc_edge1;
     * solaris machine seamus and linux machine patches give different
     * results due to differences in n2Row and v2Col, respectively.  
     * Neither answer is wrong,
     * but the linux results result in FAILED test in test_zoltan.
     */
    frac = (float) ohg->nVtx / (float) ncomm->nProc_x;
    for (i=0; i<ohg->nVtx; ++i) 
        v2Col[i] = (int) ((float) i / frac);
    frac = (float) ohg->nEdge / (float) ncomm->nProc_y;
    for (i=0; i<ohg->nEdge; ++i) 
        n2Row[i] = (int) ((float) i / frac);

    ierr |= Zoltan_PHG_Redistribute_Hypergraph(zz, hgp, ohg, lo, 
                                               v2Col, n2Row, ncomm, 
                                               nhg, vmap, vdest);
    Zoltan_Multifree(__FILE__, __LINE__, 2,
                     &v2Col, &n2Row);
    
    return ierr;
}
示例#19
0
static int serial_fm2 (ZZ *zz,
    HGraph *hg,
    int p,
    float *part_sizes,
    Partition part,
    PHGPartParams *hgp,
    float bal_tol)
{
int    i, j, vertex, edge, *pins[2], *locked = 0, *locked_list = 0, round = 0;
double total_weight, part_weight[2], max_weight[2];
double cutsize_beforepass, best_cutsize, *gain = 0;
HEAP   heap[2];
int    steplimit;
char   *yo="serial_fm2";
int    part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
#ifdef HANDLE_ISOLATED_VERTICES    
 int    isocnt=0;
#endif
#ifdef _DEBUG
 double tw0, imbal, cutsize;
#endif

double error, best_error;
int    best_imbalance, imbalance;

  if (p != 2) {
     ZOLTAN_PRINT_ERROR(zz->Proc, yo, "p!=2 not allowed for local_fm2.");
     return ZOLTAN_FATAL;
     }

  if (hg->nEdge == 0)
     return ZOLTAN_OK;

  /* Calculate the weights in each partition and total, then maxima */
  part_weight[0] = 0.0;
  part_weight[1] = 0.0;
  if (hg->vwgt)  {
     for (i = 0; i < hg->nVtx; i++)
        part_weight[part[i]] += hg->vwgt[i*hg->VtxWeightDim];
     total_weight = part_weight[0] + part_weight[1];
  }
  else  {
     total_weight = (double)(hg->nVtx);
     for (i = 0; i < hg->nVtx; i++)
        part_weight[part[i]] += 1.0;
  }
  max_weight[0] = total_weight * bal_tol * part_sizes[0];
  max_weight[1] = total_weight * bal_tol * part_sizes[part_dim];

#ifdef _DEBUG
  tw0 = total_weight * part_sizes[0];
#endif
  
  if (!(pins[0]     = (int*)   ZOLTAN_CALLOC(2*hg->nEdge, sizeof(int)))
   || !(locked      = (int*)   ZOLTAN_CALLOC(hg->nVtx,    sizeof(int)))
   || !(locked_list = (int*)   ZOLTAN_CALLOC(hg->nVtx,    sizeof(int)))
   || !(gain        = (double*)ZOLTAN_CALLOC(hg->nVtx,    sizeof(double))) ) {
     Zoltan_Multifree(__FILE__,__LINE__, 4, &pins[0], &locked, &locked_list,
          &gain);
     ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
     return ZOLTAN_MEMERR;
  }
  pins[1] = &(pins[0][hg->nEdge]);

  /* Initial calculation of the pins distribution and gain values */
  for (i = 0; i < hg->nEdge; i++)
     for (j = hg->hindex[i]; j < hg->hindex[i+1]; j++)
        (pins[part[hg->hvertex[j]]][i])++;
  for (i = 0; i < hg->nVtx; i++)
     for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
        edge = hg->vedge[j];
        if (pins[part[i]][edge] == 1)
           gain[i] += (hg->ewgt ? hg->ewgt[edge] : 1.0);
        else if (pins[1-part[i]][edge] == 0)
           gain[i] -= (hg->ewgt ? hg->ewgt[edge] : 1.0);
     }

  /* Initialize the heaps and fill them with the gain values */
  Zoltan_Heap_Init(zz, &heap[0], hg->nVtx);
  Zoltan_Heap_Init(zz, &heap[1], hg->nVtx);  
  for (i = 0; i < hg->nVtx; i++)
      if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
#ifdef HANDLE_ISOLATED_VERTICES          
          if (hg->vindex[i+1]==hg->vindex[i]) { /* isolated vertex */
              part_weight[part[i]] -= (hg->vwgt ? hg->vwgt[i*hg->VtxWeightDim] 
                                                : 1.0);
              part[i] = -(part[i]+1); /* remove those vertices from that part*/
              ++isocnt;
          } else
#endif
              Zoltan_Heap_Input(&heap[part[i]], i, gain[i]);
      }
#ifdef _DEBUG
      else {
          int pp = (hg->fixed_part[i] < hg->bisec_split) ? 0 : 1;
          if (part[i]!=pp) 
              errexit("%s: beginning of pass for hg->info=%d vertex %d is fixed at %d bisec_split is %d but its part is %d\n", uMe(hg->comm), hg->info, i, hg->fixed_part[i], hg->bisec_split, part[i]);
          
              
      }
#endif
  Zoltan_Heap_Make(&heap[0]);
  Zoltan_Heap_Make(&heap[1]);

  /* Initialize given partition as best partition */
  best_cutsize = cutsize_beforepass = Zoltan_PHG_Compute_NetCut(hg->comm, hg, part);
  best_error = MAX (part_weight[0]-max_weight[0], part_weight[1]-max_weight[1]);
  best_imbalance = (part_weight[0]>max_weight[0])||(part_weight[1]>max_weight[1]);
  do {
    int step = 0, no_better_steps = 0, number_locked = 0, best_locked = 0;
    int sour, dest;
    double cur_cutsize=best_cutsize;

    round++;
    cutsize_beforepass = best_cutsize;
    if (hgp->output_level > PHG_DEBUG_LIST)
      printf("ROUND %d:\nSTEP VERTEX  PARTS MAX_WGT CHANGE CUTSIZE\n",round);

    steplimit = (hgp->fm_max_neg_move < 0) ? hg->nVtx : hgp->fm_max_neg_move;
    /* steplimit = hg->nVtx/4;  Robsys previous choice */

    while (step < hg->nVtx && no_better_steps < steplimit) {
        step++;
        no_better_steps++;

        if (Zoltan_Heap_Empty(&heap[0]))
           sour = 1;
        else if (Zoltan_Heap_Empty(&heap[1]))
           sour = 0;
        else if (part_weight[0] > max_weight[0])
           sour = 0;
        else if (part_weight[1] > max_weight[1])
           sour = 1;
        else if (Zoltan_Heap_Max_Value(&heap[0])
              >  Zoltan_Heap_Max_Value(&heap[1]))
           sour = 0;
        else
           sour = 1;
        dest = 1-sour;
        vertex = Zoltan_Heap_Extract_Max(&heap[sour]);
        if (vertex<0)
            break;

        locked[vertex] = part[vertex] + 1;
        locked_list[number_locked++] = vertex;
        cur_cutsize -= gain[vertex];        
        
        Zoltan_HG_move_vertex (hg, vertex, sour, dest, part, pins, gain, heap);

#ifdef _DEBUG
        imbal = (tw0==0.0) ? 0.0 : (part_weight[0]-tw0)/tw0;
        uprintf(hg->comm, "%4d: SEQ moving %4d from %d to %d cut=%6.0lf bal=%.3lf\n", step, vertex, sour, dest, cur_cutsize, imbal);
        /* Just for debugging */
        cutsize = Zoltan_PHG_Compute_NetCut(hg->comm, hg, part);
        if (cur_cutsize!=cutsize) {
            errexit("%s: SEQ after move cutsize=%.2lf Verify: total=%.2lf\n", uMe(hg->comm), cur_cutsize,
                    cutsize);
        }
#endif
        
        part_weight[sour] -= (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        part_weight[dest] += (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);

        error = MAX (part_weight[0]-max_weight[0],part_weight[1]-max_weight[1]);
        imbalance = (part_weight[0]>max_weight[0])||(part_weight[1]>max_weight[1]);

        if ( ( best_imbalance && (error < best_error))
          || (!imbalance && (cur_cutsize < best_cutsize)))  {
            best_error   = error;
            best_imbalance = imbalance;
            best_locked  = number_locked;
            best_cutsize = cur_cutsize;
            no_better_steps = 0;
        }
        if (hgp->output_level > PHG_DEBUG_LIST+1)
           printf ("%4d %6d %2d->%2d %7.2f %f %f\n", step, vertex, sour, dest,
            error, cur_cutsize - cutsize_beforepass, cur_cutsize);
    }

#ifdef _DEBUG
    uprintf(hg->comm, "SEQ Best CUT=%6.0lf at move %d\n", best_cutsize, best_locked);
#endif
    
    /* rollback */
     while (number_locked != best_locked) {
        vertex = locked_list[--number_locked];
        sour = part[vertex];
        dest = locked[vertex] - 1;

        Zoltan_HG_move_vertex (hg, vertex, sour, dest, part, pins, gain, heap);

        part_weight[sour] -= (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        part_weight[dest] += (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        Zoltan_Heap_Input(&heap[dest], vertex, gain[vertex]);
        locked[vertex] = 0;
     }

     /* only update data structures if we're going to do another pass */
     if ((best_cutsize < cutsize_beforepass) &&  (round < hgp->fm_loop_limit)) {         
         while (number_locked) {
             vertex = locked_list[--number_locked];
             locked[vertex] = 0;
             Zoltan_Heap_Input(&heap[part[vertex]], vertex, gain[vertex]);
         }
         
         Zoltan_Heap_Make(&(heap[0]));
         Zoltan_Heap_Make(&(heap[1]));
     }
  } while ((best_cutsize < cutsize_beforepass) &&  (round < hgp->fm_loop_limit));

#ifdef HANDLE_ISOLATED_VERTICES
  if (isocnt) {
#ifdef _DEBUG      
      double isoimbalbefore, isoimbal;
#endif
      double targetw0;
      
      targetw0 = total_weight * part_sizes[0];
#ifdef _DEBUG      
      isoimbalbefore = (targetw0==0) ? 0.0 : (part_weight[0] - targetw0)/ targetw0;
#endif
      for (i=0; i < hg->nVtx; ++i)
          if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
              if (hg->vindex[i+1]==hg->vindex[i])  { /* go over isolated vertices */
                  int npno = (part_weight[0] <  targetw0) ? 0 : 1;
                  part_weight[npno] += (hg->vwgt ? hg->vwgt[i*hg->VtxWeightDim] 
                                                 : 1.0);                
                  part[i] = npno;
              }
          }
#ifdef _DEBUG      
      isoimbal = (targetw0==0) ? 0.0 : (part_weight[0] - targetw0)/ targetw0;
      uprintf(hg->comm, "SEQ %d isolated vertices, balance before: %.3lf  after: %.3lf\n", isocnt, isoimbalbefore, isoimbal);
#endif
  }
#endif  
  
  /* gain_check (hg, gain, part, pins); */
  Zoltan_Multifree(__FILE__,__LINE__, 4, &pins[0], &locked, &locked_list, &gain);
  Zoltan_Heap_Free(&heap[0]);
  Zoltan_Heap_Free(&heap[1]);

  return ZOLTAN_OK;
}
示例#20
0
static int Zoltan_Reftree_Sum_Weights(ZZ *zz)

{
/*
 * Function to sum the weights in the refinement tree.  On input the
 * refinement tree should be valid and have weight set.  On output the
 * values in summed_weight at each node is the sum of the weights in the
 * subtree with that node as root.
 * This function also sets assigned_to_me for interior nodes to be
 * 1 if the entire subtree is assigned to this processor
 * 0 if none of the subtree is assigned to this processor
 * -1 if some of the subtree is assigned to this processor
 */
char *yo = "Zoltan_Reftree_Sum_Weights";
ZOLTAN_REFTREE *root;         /* Root of the refinement tree */
int wdim;                 /* Dimension of the weight array */
int i,j;                  /* loop counters */
int count;                /* counter */
ZOLTAN_ID_PTR leaf_list = NULL;      
                          /* leaves for which some proc requests weight */
ZOLTAN_ID_PTR all_leaflist = NULL;   
                          /* leaf_list from all processors */
int reqsize;              /* length of leaf_list */
int *reqsize_all;         /* reqsize from all processors */
int sum_reqsize;          /* sum of all reqsize */
int *displs;              /* running sum of all reqsize */
int my_start;             /* position in leaf_list of this proc's list */
int nproc;                /* number of processors */
ZOLTAN_REFTREE *node;         /* a node in the refinement tree */
struct Zoltan_Reftree_hash_node **hashtab; /* hash table */
int hashsize;             /* dimension of hash table */
float *send_float;        /* sending message of floats */
float *req_weights;       /* the requested weights */
int num_gid_entries = zz->Num_GID; /* Number of array entries in a global ID */

   ZOLTAN_TRACE_ENTER(zz, yo);

  /*
   * set the root and hash table
   */

  root = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->reftree_root;
  if (root == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Refinement tree not defined.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_FATAL);
  }
  hashtab  = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->hash_table;
  hashsize = ((struct Zoltan_Reftree_data_struct *)zz->LB.Data_Structure)->hash_table_size;

  /*
   * Determine the dimension of the weight array
   */

  if (zz->Obj_Weight_Dim == 0) {
    wdim = 1;
  } else {
    wdim = zz->Obj_Weight_Dim;
  }

  /*
   * In the first pass, sum the weights of the nodes that are assigned to
   * this processor, and count the leaves that are not.
   */

  count = 0;
  for (i=0; i<root->num_child; i++) {
    Zoltan_Reftree_Sum_My_Weights(zz,&(root->children[i]),&count,wdim);
  }
  root->assigned_to_me = -1;

  /*
   * Make a list of the leaves that are not assigned to this processor
   */

  if (count == 0)
    leaf_list = ZOLTAN_MALLOC_GID(zz);
  else
    leaf_list = ZOLTAN_MALLOC_GID_ARRAY(zz, count);
  if (leaf_list == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_MEMERR);
  }

  count = 0;
  Zoltan_Reftree_List_Other_Leaves(zz, root,leaf_list,&count);

  /*
   * Get the unknown leaf weights from other processors.
   */

  nproc = zz->Num_Proc;
  reqsize = count;

  /*
   * Build a list of all processor's request list by concatinating them in
   * the order of the processor ranks
   */

  /*
   * Determine the request size of all processors
   */

  reqsize_all = (int *)ZOLTAN_MALLOC(nproc*sizeof(int));
  displs = (int *)ZOLTAN_MALLOC(nproc*sizeof(int));
  if (reqsize_all == NULL || displs == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    Zoltan_Multifree(__FILE__, __LINE__, 3, &displs,
                                            &reqsize_all,
                                            &leaf_list);
    ZOLTAN_TRACE_EXIT(zz, yo);
    return(ZOLTAN_MEMERR);
  }

  MPI_Allgather((void *)&reqsize,1,MPI_INT,(void *)reqsize_all,1,MPI_INT,
                zz->Communicator);
  displs[0] = 0;
  for (i=1; i<nproc; i++) displs[i] = displs[i-1]+reqsize_all[i-1];
  sum_reqsize = displs[nproc-1] + reqsize_all[nproc-1];
  my_start = displs[zz->Proc];

  /*
   * If sum_reqsize is 0, nothing needs to be communciated
   */

  if (sum_reqsize == 0) {
    Zoltan_Multifree(__FILE__, __LINE__, 3, &displs,
                                            &reqsize_all,
                                            &leaf_list);
  }
  else {

  /*
   * Gather the request list from all processors
   */

    all_leaflist = ZOLTAN_MALLOC_GID_ARRAY(zz, sum_reqsize);
    if (all_leaflist == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 4, &all_leaflist,
                                              &displs,
                                              &reqsize_all,
                                              &leaf_list);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    /* KDDKDD Changed MPI_BYTE to ZOLTAN_ID_MPI_TYPE  */

    /* Account for number of array entries in an ID. */
    for (i=0; i<nproc; i++) {
      reqsize_all[i] = reqsize_all[i]*num_gid_entries;
      displs[i] = displs[i]*num_gid_entries;
    }

    MPI_Allgatherv((void *)leaf_list,reqsize*num_gid_entries,ZOLTAN_ID_MPI_TYPE,
                   (void *)all_leaflist,reqsize_all,displs,ZOLTAN_ID_MPI_TYPE,
                   zz->Communicator);

    ZOLTAN_FREE(&displs);
    ZOLTAN_FREE(&leaf_list);

    for (i=0; i<nproc; i++) reqsize_all[i] = reqsize_all[i]/num_gid_entries;

  /* 
   * Create a list with the partial sums this processor has
   */

    send_float = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim*sum_reqsize);
    if (send_float == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 3, &send_float,
                                              &all_leaflist,
                                              &reqsize_all);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    for (i=0; i<sum_reqsize; i++) {
      node = Zoltan_Reftree_hash_lookup(zz, hashtab,
                                    &(all_leaflist[i*num_gid_entries]),
                                    hashsize);
      if (node == NULL)
         for (j=0; j<wdim; j++) send_float[i*wdim+j] = 0.0;
      else
         for (j=0; j<wdim; j++) send_float[i*wdim+j] = node->my_sum_weight[j];
    }

  /*
   * Sum the weights over all the processors
   */

    if (reqsize == 0)
      req_weights = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim);
    else
      req_weights = (float *) ZOLTAN_MALLOC(sizeof(float)*wdim*reqsize);
    if (req_weights == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_Multifree(__FILE__, __LINE__, 4, &req_weights,
                                              &send_float,
                                              &all_leaflist,
                                              &reqsize_all);
      ZOLTAN_TRACE_EXIT(zz, yo);
      return(ZOLTAN_MEMERR);
    }

    MPI_Reduce_scatter((void *)send_float, (void *)req_weights, reqsize_all,
                       MPI_FLOAT, MPI_SUM, zz->Communicator);

    ZOLTAN_FREE(&send_float);
    ZOLTAN_FREE(&reqsize_all);

  /*
   * Set the weights this processor requested
   */

    for (i=0; i<count; i++) {
      node = Zoltan_Reftree_hash_lookup(zz, hashtab,
                                  &(all_leaflist[(i+my_start)*num_gid_entries]),
                                  hashsize);
      for (j=0; j<wdim; j++) node->summed_weight[j] = req_weights[i*wdim+j];
    }

    ZOLTAN_FREE(&req_weights);
    ZOLTAN_FREE(&all_leaflist);
  }

  /*
   * All the leaves now have summed_weight set.
   * Sum the weights throughout the tree.
   */

  Zoltan_Reftree_Sum_All_Weights(zz,root,wdim);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return(ZOLTAN_OK);
}
示例#21
0
int Zoltan_Order (
    struct Zoltan_Struct *zz,
    int num_gid_entries,
    int num_obj,
    ZOLTAN_ID_PTR gids,
    ZOLTAN_ID_PTR permuted_global_ids
)
{
    /*
     * Main user-call for ordering.
     * Input:
     *   zz, a Zoltan structure with appropriate function pointers set.
     *   gids, a list of global ids.
     *   num_gid_entries
     * Output:
     *   permuted_global_ids
     * Return values:
     *   Zoltan error code.
     */

    char *yo = "Zoltan_Order";
    int ierr;
    double start_time, end_time;
    double order_time[2] = {0.0,0.0};
    char msg[256];
    int comm[2],gcomm[2];
    ZOLTAN_ORDER_FN *Order_fn;
    struct Zoltan_Order_Options opt;
    ZOLTAN_ID_PTR local_gids=NULL, lids=NULL;
    int local_num_obj;
    int *local_rank = NULL;
    struct Zoltan_DD_Struct *dd = NULL;


    ZOLTAN_TRACE_ENTER(zz, yo);

    if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
        Zoltan_Print_Key_Params(zz);

    start_time = Zoltan_Time(zz->Timer);

    /*
     * Compute Max number of array entries per ID over all processors.
     * This is a sanity-maintaining step; we don't want different
     * processors to have different values for these numbers.
     */
    comm[0] = zz->Num_GID;
    comm[1] = zz->Num_LID;
    MPI_Allreduce(comm, gcomm, 2, MPI_INT, MPI_MAX, zz->Communicator);
    zz->Num_GID = gcomm[0];

    if (num_gid_entries != zz->Num_GID) {
        char msg[253];
        sprintf(msg, "num_gid_entries=%d is not equal to parameter setting "
                "NUM_GID_ENTRIES=%d\n", num_gid_entries, zz->Num_GID);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
        return (ZOLTAN_FATAL);
    }


    zz->Order.nbr_objects = num_obj;
    zz->Order.start = NULL;
    zz->Order.ancestor = NULL;
    zz->Order.leaves = NULL;
    zz->Order.nbr_leaves = 0;
    zz->Order.nbr_blocks = 0;

    /*
     *  Return if this processor is not in the Zoltan structure's
     *  communicator.
     */

    if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_OK);
    }

    /*
     *  Get ordering options from parameter list.
     */

    /* Set default parameter values */
#ifdef HAVE_MPI
    strncpy(opt.method, "PARMETIS", MAX_PARAM_STRING_LEN);
    strcpy(zz->Order.order_type, "GLOBAL");
#else
    strncpy(opt.method, "METIS", MAX_PARAM_STRING_LEN);
    strcpy(zz->Order.order_type, "LOCAL");
#endif /* HAVE_MPI */

    opt.use_order_info = 0;
    opt.start_index = 0;

    Zoltan_Bind_Param(Order_params, "ORDER_METHOD", (void *) opt.method);
    Zoltan_Bind_Param(Order_params, "USE_ORDER_INFO", (void *) &opt.use_order_info);

    Zoltan_Assign_Param_Vals(zz->Params, Order_params, zz->Debug_Level,
                             zz->Proc, zz->Debug_Proc);

    /*
     *  Check that the user has allocated space for the return args.
     */
    if (num_obj && !(gids && permuted_global_ids)) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input argument is NULL. Please allocate all required arrays before calling this routine.");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    /*
     *  Find the selected method.
     */

    if (!strcmp(opt.method, "NONE")) {
        if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
            ZOLTAN_PRINT_WARN(zz->Proc, yo, "Ordering method selected == NONE; no ordering performed\n");

        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_WARN);
    }
    else if (!strcmp(opt.method, "LOCAL_HSFC"))
    {
        Order_fn = Zoltan_LocalHSFC_Order;
        strcpy(zz->Order.order_type, "LOCAL"); /*MMW, not sure about this*/
    }
#ifdef ZOLTAN_PARMETIS
    else if (!strcmp(opt.method, "METIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        strcpy(zz->Order.order_type, "LOCAL");
    }
    else if (!strcmp(opt.method, "PARMETIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        strcpy(zz->Order.order_type, "GLOBAL");
    }
#endif /* ZOLTAN_PARMETIS */
#ifdef ZOLTAN_SCOTCH
    else if (!strcmp(opt.method, "SCOTCH")) {
        Order_fn = Zoltan_Scotch_Order;
        strcpy(zz->Order.order_type, "LOCAL");
    }
    else if (!strcmp(opt.method, "PTSCOTCH")) {
        Order_fn = Zoltan_Scotch_Order;
        strcpy(zz->Order.order_type, "GLOBAL");
    }
#endif /* ZOLTAN_SCOTCH */
#ifdef ZOLTAN_HUND
    else if (!strcasecmp(opt.method, "HUND")) {
        ierr = Zoltan_HUND(zz, num_gid_entries, num_obj, gids, permuted_global_ids, NULL);
        goto End;
    }
#endif /* ZOLTAN_HUND */
    else {
        fprintf(stderr, "%s\n", opt.method);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unknown ordering method");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    /* TODO : Ask why useful ! */
    /*
     *  Construct the heterogenous machine description.
     */
    ierr = Zoltan_Build_Machine_Desc(zz);
    if (ierr == ZOLTAN_FATAL) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ierr);
    }
    ZOLTAN_TRACE_DETAIL(zz, yo, "Done machine description");


    /************************************
     *  Check for required query function
     ************************************/
    if (zz->Get_Num_Obj != NULL) {
        local_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Get_Num_Obj.");
            return (ierr);
        }
    }
    else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register ZOLTAN_NUM_OBJ_FN.");
        return (ZOLTAN_FATAL);
    }


    /* TODO allocate all this stuff with the graph */
    local_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, local_num_obj);
    local_rank = (int*) ZOLTAN_MALLOC(local_num_obj*sizeof(int));

    lids = ZOLTAN_MALLOC_LID_ARRAY(zz, local_num_obj);

    /*
     * Call the actual ordering function.
     * Compute gid according to the local graph.
     */

    ierr = (*Order_fn)(zz, local_num_obj, local_gids, lids, local_rank, NULL, &opt);
    ZOLTAN_FREE(&lids);

    if (ierr) {
        sprintf(msg, "Ordering routine returned error code %d.", ierr);
        if (ierr == ZOLTAN_WARN) {
            ZOLTAN_PRINT_WARN(zz->Proc, yo, msg);
        } else {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
            Zoltan_Multifree(__FILE__, __LINE__, 2,
                             &local_gids, &local_rank);
            ZOLTAN_TRACE_EXIT(zz, yo);
            return (ierr);
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done ordering");

    /* TODO: Use directly the "graph" structure to avoid to duplicate things. */

    /* TODO: At this time, I consider rank == permuted_global_ids */

    /* I store : GNO, rank, permuted GID */
    /* MMW: perhaps don't ever use graph here since we need to support geometric orderings, otherwise need if/else */
    ierr = Zoltan_DD_Create (&dd, zz->Communicator, zz->Num_GID, (local_rank==NULL)?0:1, 0, local_num_obj, 0);
    /* Hope a linear assignment will help a little */
    if (local_num_obj)
        Zoltan_DD_Set_Neighbor_Hash_Fn1(dd, local_num_obj);
    /* Associate all the data with our xGNO */

    Zoltan_DD_Update (dd, local_gids, (ZOLTAN_ID_PTR)local_rank, NULL, NULL, local_num_obj);


    ZOLTAN_FREE(&local_gids);
    ZOLTAN_FREE(&local_rank);

    Zoltan_DD_Find (dd, gids, (ZOLTAN_ID_PTR)permuted_global_ids, NULL, NULL,
                    num_obj, NULL);
    Zoltan_DD_Destroy(&dd);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done Registering results");


    end_time = Zoltan_Time(zz->Timer);
    order_time[0] = end_time - start_time;

    if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) {
        int i;
        Zoltan_Print_Sync_Start(zz->Communicator, TRUE);
        printf("ZOLTAN: rank for ordering on Proc %d\n", zz->Proc);
        for (i = 0; i < num_obj; i++) {
            printf("GID = ");
            ZOLTAN_PRINT_GID(zz, &(gids[i*(num_gid_entries)]));
            printf(", rank = %3d\n", permuted_global_ids[i]);
        }
        printf("\n");
        Zoltan_Print_Sync_End(zz->Communicator, TRUE);
    }

    /* Print timing info */
    if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) {
        if (zz->Proc == zz->Debug_Proc) {
            printf("ZOLTAN Times:  \n");
        }
        Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, order_time[0],
                            "ZOLTAN     Balance:     ");
    }

#ifdef ZOLTAN_HUND
End:
#endif /*ZOLTAN_HUND*/
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
}