Пример #1
0
int Zoltan_Input_HG_Free(ZHG *zhg)
{
  Zoltan_HG_HGraph_Free(&zhg->HG);

  ZOLTAN_FREE(&(zhg->objWeight));
  ZOLTAN_FREE(&(zhg->objGNO));
  ZOLTAN_FREE(&(zhg->objGID));
  ZOLTAN_FREE(&(zhg->objLID));
  ZOLTAN_FREE(&(zhg->numHEdges));
  ZOLTAN_FREE(&(zhg->coor));
  ZOLTAN_FREE(&(zhg->fixed));
  ZOLTAN_FREE(&(zhg->Input_Parts));
  ZOLTAN_FREE(&(zhg->Output_Parts));
  ZOLTAN_FREE(&(zhg->AppObjSizes));
  ZOLTAN_FREE(&(zhg->edgeGNO));
  ZOLTAN_FREE(&(zhg->Esize));
  ZOLTAN_FREE(&(zhg->Ewgt));
  ZOLTAN_FREE(&(zhg->pinGNO));
  ZOLTAN_FREE(&(zhg->Pin_Procs));
  ZOLTAN_FREE(&(zhg->Recv_GNOs));

  Zoltan_Comm_Destroy(&(zhg->VtxPlan));

  return ZOLTAN_OK;
}
Пример #2
0
void Zoltan_PHG_Free_Hypergraph_Data(ZHG *zoltan_hg)
{
  if (zoltan_hg != NULL) {
    Zoltan_Multifree(__FILE__, __LINE__, 12, &zoltan_hg->GIDs,
                                            &zoltan_hg->LIDs,
                                            &zoltan_hg->Input_Parts,
                                            &zoltan_hg->Output_Parts,
                                            &zoltan_hg->AppObjSizes,
                                            &zoltan_hg->Remove_EGIDs,
                                            &zoltan_hg->Remove_ELIDs,
                                            &zoltan_hg->Remove_Esize,
                                            &zoltan_hg->Remove_GEsize,
                                            &zoltan_hg->Remove_Ewgt,
                                            &zoltan_hg->Remove_Pin_GIDs,
                                            &zoltan_hg->Remove_Pin_Procs);

    Zoltan_HG_HGraph_Free (&zoltan_hg->HG);
  }
}
Пример #3
0
/*  Main partitioning function for hypergraph partitioning. */
int Zoltan_PHG_Partition (
  ZZ *zz,               /* Zoltan data structure */
  HGraph *hg,           /* Input hypergraph to be partitioned */
  int p,                /* Input:  number partitions to be generated */
  float *part_sizes,    /* Input:  array of length p containing percentages
                           of work to be assigned to each partition */
  Partition parts,      /* Input:  initial partition #s; aligned with vtx 
                           arrays. 
                           Output:  computed partition #s */
  PHGPartParams *hgp,   /* Input:  parameters for hgraph partitioning. */
  int level)
{

  PHGComm *hgc = hg->comm;
  VCycle  *vcycle=NULL, *del=NULL;
  int  i, err = ZOLTAN_OK;
  int  prevVcnt     = 2*hg->dist_x[hgc->nProc_x];
  int  prevVedgecnt = 2*hg->dist_y[hgc->nProc_y];
  char *yo = "Zoltan_PHG_Partition";
  static int timer_match = -1,    /* Timers for various stages */
             timer_coarse = -1,   /* Declared static so we can accumulate */
             timer_refine = -1,   /* times over calls to Zoltan_PHG_Partition */
             timer_coarsepart = -1,
             timer_project = -1,
             timer_vcycle = -1;   /* times everything in Vcycle not included
                                     in above timers */
  int do_timing = (hgp->use_timers > 1);
  int vcycle_timing = (hgp->use_timers > 4);

  ZOLTAN_TRACE_ENTER(zz, yo);
    
  if (do_timing) {
    if (timer_vcycle < 0) 
      timer_vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle");
    if (timer_match < 0) 
      timer_match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching");
    if (timer_coarse < 0) 
      timer_coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening");
    if (timer_coarsepart < 0)
      timer_coarsepart = Zoltan_Timer_Init(zz->ZTime, 1,
                                           "Coarse_Partition");
    if (timer_refine < 0) 
      timer_refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement");
    if (timer_project < 0) 
      timer_project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up");

    ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
  }

  if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) {
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL.");
    return ZOLTAN_MEMERR;
  }

  /****** Coarsening ******/    
#define COARSEN_FRACTION_LIMIT 0.9  /* Stop if we don't make much progress */
  while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > hg->redl)
    && ((hg->dist_x[hgc->nProc_x] < (int) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5))
     || (hg->dist_y[hgc->nProc_y] < (int) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5)))
    && hg->dist_y[hgc->nProc_y] && hgp->matching) {
      int *match = NULL;
      VCycle *coarser=NULL;
        
      prevVcnt     = hg->dist_x[hgc->nProc_x];
      prevVedgecnt = hg->dist_y[hgc->nProc_y];

#ifdef _DEBUG      
      /* UVC: load balance stats */
      Zoltan_PHG_LoadBalStat(zz, hg);
#endif
      
      if (hgp->output_level >= PHG_DEBUG_LIST) {
          uprintf(hgc,
                  "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
                  hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
                  hgp->redm_str,
                  hgp->coarsepartition_str, hgp->refinement_str, p);
          if (hgp->output_level > PHG_DEBUG_LIST) {
              err = Zoltan_HG_Info(zz, hg);
              if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
                  goto End;
          }
      }
      if (hgp->output_level >= PHG_DEBUG_PLOT)
        Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
         "coarsening plot");

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_match, hgc->Communicator);
      }
      if (vcycle_timing) {
        if (vcycle->timer_match < 0) {
          char str[80];
          sprintf(str, "VC Matching %d", hg->info);
          vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match,
                           hgc->Communicator);
      }

      /* Allocate and initialize Matching Array */
      if (hg->nVtx && !(match = (int*) ZOLTAN_MALLOC (hg->nVtx*sizeof(int)))) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array");
        return ZOLTAN_MEMERR;
      }
      for (i = 0; i < hg->nVtx; i++)
        match[i] = i;
        
      /* Calculate matching (packing or grouping) */
      err = Zoltan_PHG_Matching (zz, hg, match, hgp);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
        ZOLTAN_FREE ((void**) &match);
        goto End;
      }
      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match,
                          hgc->Communicator);

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_match, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_coarse, hgc->Communicator);
      }

      if (vcycle_timing) {
        if (vcycle->timer_coarse < 0) {
          char str[80];
          sprintf(str, "VC Coarsening %d", hg->info);
          vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse,
                           hgc->Communicator);
      }
            
      if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) {
        ZOLTAN_FREE ((void**) &match);
        ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL.");
        goto End;
      }

      /* Construct coarse hypergraph and LevelMap */
      err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap,
       &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, 
       &vcycle->comm_plan, hgp);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) 
        goto End;

      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse,
                          hgc->Communicator);
        
      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarse, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
      }

      ZOLTAN_FREE ((void**) &match);

      if ((err=allocVCycle(coarser))!= ZOLTAN_OK)
        goto End;
      vcycle = coarser;
      hg = vcycle->hg;
  }

  if (hgp->output_level >= PHG_DEBUG_LIST) {
    uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
     hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
     hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p);
    if (hgp->output_level > PHG_DEBUG_LIST) {
      err = Zoltan_HG_Info(zz, hg);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
        goto End;
    }
  }
  if (hgp->output_level >= PHG_DEBUG_PLOT)
    Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
     "coarsening plot");

  /* free array that may have been allocated in matching */
  if (hgp->vtx_scal) ZOLTAN_FREE(&(hgp->vtx_scal));

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer_coarsepart, hgc->Communicator);
  }

  /****** Coarse Partitioning ******/
  err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp);
  if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
    goto End;

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarsepart, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
  }

  del = vcycle;
  /****** Uncoarsening/Refinement ******/
  while (vcycle) {
    VCycle *finer = vcycle->finer;
    hg = vcycle->hg;

    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_refine, hgc->Communicator);
    }
    if (vcycle_timing) {
      if (vcycle->timer_refine < 0) {
        char str[80];
        sprintf(str, "VC Refinement %d", hg->info);
        vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str);
      }
      ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine,
                         hgc->Communicator);
    }

    err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp);
        
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_refine, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
    }
    if (vcycle_timing)
      ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine,
                        hgc->Communicator);

                          
    if (hgp->output_level >= PHG_DEBUG_LIST)     
      uprintf(hgc, 
              "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n",
              hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
              hgp->redm_str,
              hgp->coarsepartition_str, hgp->refinement_str, p,
              Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, p, vcycle->Part),
              Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err));

    if (hgp->output_level >= PHG_DEBUG_PLOT)
      Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part,
       "partitioned plot");
        
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_project, hgc->Communicator);
    }
    if (vcycle_timing) {
      if (vcycle->timer_project < 0) {
        char str[80];
        sprintf(str, "VC Project Up %d", hg->info);
        vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str);
      }
      ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project,
                         hgc->Communicator);
    }

    /* Project coarse partition to fine partition */
    if (finer)  { 
      int *rbuffer;
            
      /* easy to undo internal matches */
      for (i = 0; i < finer->hg->nVtx; i++)
        if (finer->LevelMap[i] >= 0)
          finer->Part[i] = vcycle->Part[finer->LevelMap[i]];
          
      /* fill sendbuffer with part data for external matches I owned */    
      for (i = 0; i < finer->LevelCnt; i++)  {
        ++i;          /* skip return lno */
        finer->LevelData[i] = finer->Part[finer->LevelData[i]]; 
      }
            
      /* allocate rec buffer */
      rbuffer = NULL;
      if (finer->LevelSndCnt > 0)  {
        rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int));
        if (!rbuffer)    {
          ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
          return ZOLTAN_MEMERR;
        }
      }       
      
      /* get partition assignments from owners of externally matchted vtxs */  
      Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i);
      Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, 
       (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer);

      /* process data to undo external matches */
      for (i = 0; i < 2 * finer->LevelSndCnt;)  {
        int lno, partition;
        lno       = rbuffer[i++];
        partition = rbuffer[i++];      
        finer->Part[lno] = partition;         
      }

      ZOLTAN_FREE (&rbuffer);                  
      Zoltan_Comm_Destroy (&finer->comm_plan);                   
    }
    if (do_timing) {
      ZOLTAN_TIMER_STOP(zz->ZTime, timer_project, hgc->Communicator);
      ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator);
    }
    if (vcycle_timing)
      ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project,
                        hgc->Communicator);

    vcycle = finer;
  }       /* while (vcycle) */
    
End:
  vcycle = del;
  while (vcycle) {
    if (vcycle_timing) {
      Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout);
      Zoltan_Timer_Destroy(&vcycle->timer);
    }
    if (vcycle->finer) {   /* cleanup by level */
      Zoltan_HG_HGraph_Free (vcycle->hg);
      Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part, &vcycle->LevelMap,
                        &vcycle->LevelData, &vcycle->hg);
    }
    else                   /* cleanup top level */
      Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap,
                        &vcycle->LevelData);
    del = vcycle;
    vcycle = vcycle->finer;
    ZOLTAN_FREE(&del);
  }

  if (do_timing)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator);
  ZOLTAN_TRACE_EXIT(zz, yo) ;
  return err;
}
Пример #4
0
static int gather_and_build_remap(
  ZZ *zz, 
  int *new_map,               /* Upon return, flag indicating whether parts
                                 assignments were changed due to remap. */
  int HEcnt,                  /* # of HEs allocated. */
  int *HEinfo                 /* Array of HE info; for each HE, two pins and 
                                 one edge weight. Stored as a single vector
                                 to minimize communication calls.  */
)
{
char *yo = "gather_and_remap";
int ierr = ZOLTAN_OK;
int i, uidx, tmp;
int *each_size = NULL;        /* sizes (# HEs * HEINFO_ENTRIES) for each proc */
int *recvbuf = NULL;          /* Receive buffer for gatherv */
int *displs = NULL;           /* Displacement buffer for gatherv */
int send_size;                /* Local # HEs * HEINFO_ENTRIES */
int total_size;               /* Total # ints in gatherv */
int total_HEcnt;              /* Total (across all procs) number of HEs. */
int max0, max1;               /* Max values of pin 0 and pin 1 for each HE. */
int *match = NULL;            /* Vector describing the matching. 
                                 match[i] = j ==> match[j] = i ==> 
                                 vertices i and j are matched. */
int *used = NULL;             /* Vector indicating which partitions are used
                                 in the matching. */
int limit;                    /* Maximum number of matches that are allowed */
HGraph hg;                    /* Hypergraph for matching */
float before_remap = 0,       /* Amount of data that overlaps between old and */
      after_remap = 0;        /* new decomposition before and after remapping, 
                                 respectively. */
float with_oldremap = 0;      /* Amount of data that overlaps between old and
                                 new decomposition using the OldRemap vector
                                 (remapping from the previous decomposition). */


  /* Gather HEs from each processor into a local complete HG. */

  each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int));
  if (!each_size) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }
  send_size = HEcnt * HEINFO_ENTRIES;
  MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT,
                zz->Communicator);

  for (total_size = 0, i = 0; i < zz->Num_Proc; i++) {
    total_size += each_size[i];
  }

  recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int));
  displs = recvbuf + total_size;
  if (!recvbuf) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  displs[0] = 0;
  for (i = 1; i < zz->Num_Proc; i++)
    displs[i] = displs[i-1] + each_size[i-1];

  MPI_Allgatherv(HEinfo, send_size, MPI_INT, 
                 recvbuf, each_size, displs, MPI_INT, zz->Communicator);

  total_HEcnt = total_size / HEINFO_ENTRIES;
  for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) {
    tmp = i * HEINFO_ENTRIES;
    if (recvbuf[tmp] > max0) max0 = recvbuf[tmp];
    if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1];
  }
  /* Increment max0 and max1 so that they are the maximum number of unique
     pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */
  max0++;
  max1++;
  
  /* Sanity check */
  /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes
   * does not return the correct number of non-empty partitions, allowing
   * max1 to be less than LB.Num_Global_Parts. 
   * (e.g., ewgt.adaptive-partlocal1-v3.4.?).
   */
  if (max1 > zz->LB.Num_Global_Parts) 
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1.");

  /* Set up global HG */

  Zoltan_HG_HGraph_Init(&hg);
  if (total_HEcnt) {
    hg.nVtx = max0 + zz->LB.Num_Global_Parts;  
    hg.nEdge = total_HEcnt;
    hg.nPins = total_HEcnt * 2;   /* two pins per HE */
    hg.EdgeWeightDim = 1;
    hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float));
    hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int));
    hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int));
    if (!hg.ewgt || !hg.hindex || !hg.hvertex) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < total_HEcnt; i++) {
      tmp = i * HEINFO_ENTRIES;
      hg.hindex[i] = i+i; 
      hg.hvertex[i+i] = recvbuf[tmp];
      hg.hvertex[i+i+1] = recvbuf[tmp+1]+max0;
      hg.ewgt[i] = recvbuf[tmp+2];
    }
    hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt;

    ierr = Zoltan_HG_Create_Mirror(zz, &hg);
    if (ierr < 0) goto End;
  }

  before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE");

  /* Compute the amount of overlap when using the old remap vector. */

  with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD");

  /* Do matching */

  match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int));
  used = match + hg.nVtx;
  if (hg.nVtx && !match) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  /* Max # matches allowed */
  limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); 
  do_match(zz, &hg, match, limit);

      
  /* Build remapping vector, if non-trivial matching was returned. */

  *new_map = 0;
  for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
    if (match[i+max0] != i+max0) {
      *new_map = 1;
      break;
    }

  if (*new_map) {

    zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int));
    if (!(zz->LB.Remap)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }


    /* First, process all parts that were matched. Mark matched parts as used.*/

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      zz->LB.Remap[i] = -1; 
      tmp = match[i+max0];
      if (tmp != i+max0) {
        zz->LB.Remap[i] = tmp;
        used[tmp] = 1;
      }
    }

    /* Second, process unmatched parts; if possible, keep same part number. */

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      if (!used[i]) {  /* Keep the same part number if it is not used */
        zz->LB.Remap[i] = i;
        used[i] = 1;
      }
    }
  
    /* Third, process remaining unmatched parts; assign them to 
       unused partitions.*/
  
    for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      while (used[uidx]) uidx++;   /* Find next unused partition */
      zz->LB.Remap[i] = uidx;
      used[uidx] = 1;
    }
  }

  if (*new_map) 
    after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER ");

  if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) {
    /* No benefit from remapping; don't keep it! */
    ZOLTAN_FREE(&zz->LB.Remap);
    ZOLTAN_FREE(&zz->LB.OldRemap);
    *new_map = 0;
  }
  else if (with_oldremap >= after_remap) {
    /* The old remap vector is better than the new one; keep the old one. */
    ZOLTAN_FREE(&zz->LB.Remap);
    zz->LB.Remap = zz->LB.OldRemap;
    zz->LB.OldRemap = NULL;
    *new_map = 1;
  }
  else {
    /* Going to use the new remap vector; free the old one. */
    ZOLTAN_FREE(&zz->LB.OldRemap);
  }

  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc &&
      zz->LB.Remap) 
    for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
      printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]);

End:
  ZOLTAN_FREE(&match);
  ZOLTAN_FREE(&each_size);
  ZOLTAN_FREE(&recvbuf);
  Zoltan_HG_HGraph_Free(&hg);
  return ierr;
}
Пример #5
0
/*  Main partitioning function for hypergraph partitioning. */
int Zoltan_PHG_Partition (
  ZZ *zz,               /* Zoltan data structure */
  HGraph *hg,           /* Input hypergraph to be partitioned */
  int p,                /* Input:  number partitions to be generated */
  float *part_sizes,    /* Input:  array of length p containing percentages
                           of work to be assigned to each partition */
  Partition parts,      /* Input:  initial partition #s; aligned with vtx 
                           arrays. 
                           Output:  computed partition #s */
  PHGPartParams *hgp)   /* Input:  parameters for hgraph partitioning. */
{

  PHGComm *hgc = hg->comm;
  VCycle  *vcycle=NULL, *del=NULL;
  int  i, err = ZOLTAN_OK, middle;
  ZOLTAN_GNO_TYPE origVpincnt; /* for processor reduction test */
  ZOLTAN_GNO_TYPE prevVcnt     = 2*hg->dist_x[hgc->nProc_x]; /* initialized so that the */
  ZOLTAN_GNO_TYPE prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; /* while loop will be entered
				 		               before any coarsening */
  ZOLTAN_GNO_TYPE tot_nPins, local_nPins;
  MPI_Datatype zoltan_gno_mpi_type;
  char *yo = "Zoltan_PHG_Partition";
  int do_timing = (hgp->use_timers > 1);
  int fine_timing = (hgp->use_timers > 2);
  int vcycle_timing = (hgp->use_timers > 4 && hgp->ProRedL == 0);
  short refine = 0;
  struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
  int reset_geometric_matching = 0;
  char reset_geometric_string[4];

  ZOLTAN_TRACE_ENTER(zz, yo);

  zoltan_gno_mpi_type = Zoltan_mpi_gno_type();
    
  if (do_timing) {
    if (timer->vcycle < 0) 
      timer->vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle");
    if (timer->procred < 0) 
      timer->procred = Zoltan_Timer_Init(zz->ZTime, 0, "Processor Reduction");
    if (timer->match < 0) 
      timer->match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching");
    if (timer->coarse < 0) 
      timer->coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening");
    if (timer->coarsepart < 0)
      timer->coarsepart = Zoltan_Timer_Init(zz->ZTime, 1,
                                           "Coarse_Partition");
    if (timer->refine < 0) 
      timer->refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement");
    if (timer->project < 0) 
      timer->project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up");

    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

  local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;

  MPI_Allreduce(&local_nPins,&tot_nPins,1,zoltan_gno_mpi_type,MPI_SUM,hgc->Communicator);

  origVpincnt = tot_nPins;

  if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) {
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  /* For geometric coarsening, hgp->matching pointer and string are reset
   * after geometric_levels of coarsening.  Will need to reset them after
   * this vcycle is completed.  Capture that fact now!  */
  if (!strcasecmp(hgp->redm_str, "rcb") || !strcasecmp(hgp->redm_str, "rib")) {
    reset_geometric_matching = 1;
    strcpy(reset_geometric_string, hgp->redm_str);
  }

  /****** Coarsening ******/    
#define COARSEN_FRACTION_LIMIT 0.9  /* Stop if we don't make much progress */
  while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > (ZOLTAN_GNO_TYPE)hg->redl)
	 && ((hg->dist_x[hgc->nProc_x] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) /* prevVcnt initialized to 2*hg->dist_x[hgc->nProc_x] */
	     || (hg->dist_y[hgc->nProc_y] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) /* prevVedgecnt initialized to 2*hg->dist_y[hgc->nProc_y] */
    && hg->dist_y[hgc->nProc_y] && hgp->matching) {
      ZOLTAN_GNO_TYPE *match = NULL;
      VCycle *coarser=NULL, *redistributed=NULL;
        
      prevVcnt     = hg->dist_x[hgc->nProc_x];
      prevVedgecnt = hg->dist_y[hgc->nProc_y];

#ifdef _DEBUG      
      /* UVC: load balance stats */
      Zoltan_PHG_LoadBalStat(zz, hg);
#endif
      
      if (hgp->output_level >= PHG_DEBUG_LIST) {
          uprintf(hgc,
                  "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
                  hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
                  hgp->redm_str,
                  hgp->coarsepartition_str, hgp->refinement_str, p);
          if (hgp->output_level > PHG_DEBUG_LIST) {
              err = Zoltan_HG_Info(zz, hg);
              if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
                  goto End;
          }
      }
      if (hgp->output_level >= PHG_DEBUG_PLOT)
        Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
         "coarsening plot");

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->match, hgc->Communicator);
      }
      if (vcycle_timing) {
        if (vcycle->timer_match < 0) {
          char str[80];
          sprintf(str, "VC Matching %d", hg->info);
          vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match,
                           hgc->Communicator);
      }

      /* Allocate and initialize Matching Array */
      if (hg->nVtx && !(match = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC (hg->nVtx*sizeof(ZOLTAN_GNO_TYPE)))) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_MEMERR;
      }
      for (i = 0; i < hg->nVtx; i++)
        match[i] = i;
        
      /* Calculate matching (packing or grouping) */

      err = Zoltan_PHG_Matching (zz, hg, match, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
        ZOLTAN_FREE (&match);
        goto End;
      }
      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match,
                          hgc->Communicator);

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->match, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->coarse, hgc->Communicator);
      }

      if (vcycle_timing) {
        if (vcycle->timer_coarse < 0) {
          char str[80];
          sprintf(str, "VC Coarsening %d", hg->info);
          vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse,
                           hgc->Communicator);
      }
            
      if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) {
        ZOLTAN_FREE (&match);
        ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL.");
        goto End;
      }

      /* Construct coarse hypergraph and LevelMap */
      err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap,
       &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, 
       &vcycle->comm_plan, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) 
        goto End;

      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse,
                          hgc->Communicator);
        
      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarse, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }

      ZOLTAN_FREE (&match);

      if ((err=allocVCycle(coarser))!= ZOLTAN_OK)
        goto End;
      vcycle = coarser;
      hg = vcycle->hg;

      if (hgc->nProc > 1 && hgp->ProRedL > 0) {
        local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;
	MPI_Allreduce(&local_nPins, &tot_nPins, 1, zoltan_gno_mpi_type, MPI_SUM,
		      hgc->Communicator);

	if (tot_nPins < (ZOLTAN_GNO_TYPE)(hgp->ProRedL * origVpincnt + 0.5)) {
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->procred, hgc->Communicator);
	  }
	  /* redistribute to half the processors */
	  origVpincnt = tot_nPins; /* update for processor reduction test */

	  if(hg->nVtx&&!(hg->vmap=(int*)ZOLTAN_MALLOC(hg->nVtx*sizeof(int)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: hg->vmap");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; i++)
	    hg->vmap[i] = i;

	  middle = (int)((float) (hgc->nProc-1) * hgp->ProRedL);

	  if (hgp->nProc_x_req!=1&&hgp->nProc_y_req!=1) { /* Want 2D decomp */
	    if ((middle+1) > SMALL_PRIME && Zoltan_PHG_isPrime(middle+1))
	      --middle; /* if it was prime just use one less #procs (since
			   it should be bigger than SMALL_PRIME it is safe to
			   decrement) */
	  }

	  if (!(hgc = (PHGComm*) ZOLTAN_MALLOC (sizeof(PHGComm)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: PHGComm");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  if (!(redistributed=newVCycle(zz,NULL,NULL,vcycle,vcycle_timing))) {
	    ZOLTAN_FREE (&hgc);
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "redistributed is NULL.");
	    goto End;
	  }

	  Zoltan_PHG_Redistribute(zz,hgp,hg,0,middle,hgc, redistributed->hg,
				  &vcycle->vlno,&vcycle->vdest);
	  if (hgp->UseFixedVtx || hgp->UsePrefPart)
            redistributed->hg->bisec_split = hg->bisec_split;

	  if ((err=allocVCycle(redistributed))!= ZOLTAN_OK)
	    goto End;
	  vcycle = redistributed;

	  if (hgc->myProc < 0)
	    /* I'm not in the redistributed part so I should go to uncoarsening
	       refinement and wait */ {
	    if (fine_timing) {
	      if (timer->cpgather < 0)
		timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
	      if (timer->cprefine < 0)
		timer->cprefine =Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
	      if (timer->cpart < 0)
		timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");
	    }
	    if (do_timing) {
	      ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	      ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	    }
	    goto Refine;
	  }

	  hg = vcycle->hg;
	  hg->redl = hgp->redl; /* not set with hg creation */
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	  }
	}
      }
  }

  if (hgp->output_level >= PHG_DEBUG_LIST) {
    uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
     hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
     hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p);
    if (hgp->output_level > PHG_DEBUG_LIST) {
      err = Zoltan_HG_Info(zz, hg);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
        goto End;
    }
  }
  if (hgp->output_level >= PHG_DEBUG_PLOT)
    Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
     "coarsening plot");

  /* free array that may have been allocated in matching */
  if (hgp->vtx_scal) {
    hgp->vtx_scal_size = 0;
    ZOLTAN_FREE(&(hgp->vtx_scal));
  }

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->coarsepart, hgc->Communicator);
  }

  /****** Coarse Partitioning ******/

  err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp);

  if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
    goto End;

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarsepart, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

Refine:
  del = vcycle;
  refine = 1;

  /****** Uncoarsening/Refinement ******/
  while (vcycle) {
    VCycle *finer = vcycle->finer;
    hg = vcycle->hg;

    if (refine && hgc->myProc >= 0) {
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->refine, hgc->Communicator);
      }
      if (vcycle_timing) {
	if (vcycle->timer_refine < 0) {
	  char str[80];
	  sprintf(str, "VC Refinement %d", hg->info);
	  vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str);
	}
	ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine,
			   hgc->Communicator);
      }

      err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp);
        
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->refine, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }
      if (vcycle_timing)
	ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine,
			  hgc->Communicator);

                          
      if (hgp->output_level >= PHG_DEBUG_LIST)     
	uprintf(hgc, 
		"FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n",
		hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
		hgp->redm_str,
		hgp->coarsepartition_str, hgp->refinement_str, p,
		Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, 0, p, 
                                           vcycle->Part),
		Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err));

      if (hgp->output_level >= PHG_DEBUG_PLOT)
	Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part,
			"partitioned plot");
    }

    if (finer)  {
      int *rbuffer;
            
      /* Project coarse partition to fine partition */
      if (finer->comm_plan) {
	refine = 1;
	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->project, hgc->Communicator);
	}
	if (vcycle_timing) {
	  if (vcycle->timer_project < 0) {
	    char str[80];
	    sprintf(str, "VC Project Up %d", hg->info);
	    vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str);
	  }
	  ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project,
			     hgc->Communicator);
	}
        
	/* easy to assign partitions to internal matches */
	for (i = 0; i < finer->hg->nVtx; i++)
	  if (finer->LevelMap[i] >= 0)   /* if considers only the local vertices */
	    finer->Part[i] = vcycle->Part[finer->LevelMap[i]];
          
	/* now that the course partition assignments have been propagated */
	/* upward to the finer level for the local vertices, we need to  */    
	/* fill the LevelData (matched pairs of a local vertex with a    */
	/* off processor vertex) with the partition assignment of the    */
	/* local vertex - can be done totally in the finer level!        */    
	for (i = 0; i < finer->LevelCnt; i++)  {
	  ++i;          /* skip over off processor lno */
	  finer->LevelData[i] = finer->Part[finer->LevelData[i]]; 
	}
            
	/* allocate rec buffer to exchange LevelData information */
	rbuffer = NULL;
	if (finer->LevelSndCnt > 0)  {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int));
	  if (!rbuffer)    {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}       
      
	/* get partition assignments from owners of externally matched vtxs */  
	Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i);
	Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, 
         (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer);

	/* process data to assign partitions to expernal matches */
	for (i = 0; i < 2 * finer->LevelSndCnt;)  {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];      
	  finer->Part[lno] = partition;         
	}

	ZOLTAN_FREE (&rbuffer);                  
	Zoltan_Comm_Destroy (&finer->comm_plan);                   

	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->project, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	}
	if (vcycle_timing)
	  ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project,
			    hgc->Communicator);
      } else {
	int *sendbuf = NULL, size;
	refine = 0;
	/* ints local and partition numbers */
	if (finer->vlno) {
	  sendbuf = (int*) ZOLTAN_MALLOC (2 * hg->nVtx * sizeof(int));
	  if (!sendbuf) {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; ++i) {
	    sendbuf[2 * i] = finer->vlno[i];     /* assign local numbers */
	    sendbuf[2 * i + 1] = vcycle->Part[i];/* assign partition numbers */
	  }
	}

	ZOLTAN_FREE (&hgc);
	hgc = finer->hg->comm; /* updating hgc is required when the processors
				   change */
	/* Create comm plan to unredistributed processors */
	err = Zoltan_Comm_Create(&finer->comm_plan, finer->vlno ? hg->nVtx : 0,
				 finer->vdest, hgc->Communicator, COMM_TAG+2,
				 &size);

	if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
	  ZOLTAN_PRINT_ERROR(hgc->myProc, yo, "Zoltan_Comm_Create failed.");
	  goto End;
	}

	/* allocate rec buffer to exchange sendbuf information */
	rbuffer = NULL;
	if (finer->hg->nVtx) {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->hg->nVtx * sizeof(int));

	  if (!rbuffer) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}

	/* Use plan to send partitions to the unredistributed processors */

	Zoltan_Comm_Do(finer->comm_plan, COMM_TAG+3, (char *) sendbuf,
		       2*sizeof(int), (char *) rbuffer);

	MPI_Bcast(rbuffer, 2*finer->hg->nVtx, MPI_INT, 0, hgc->col_comm);
	
	/* process data to assign partitions to unredistributed processors */
	for (i = 0; i < 2 * finer->hg->nVtx;) {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];
	  finer->Part[lno] = partition;
	}

	if (finer->vlno)
	  ZOLTAN_FREE (&sendbuf);

	ZOLTAN_FREE (&rbuffer);
	Zoltan_Comm_Destroy (&finer->comm_plan);
      }
    }

    vcycle = finer;
  }       /* while (vcycle) */
    
End:
  vcycle = del;
  while (vcycle) {
    if (vcycle_timing) {
      Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout);
      Zoltan_Timer_Destroy(&vcycle->timer);
    }
    if (vcycle->finer) {   /* cleanup by level */
      Zoltan_HG_HGraph_Free (vcycle->hg);

      if (vcycle->LevelData)
	Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg);
      else if (vcycle->vlno)
	Zoltan_Multifree (__FILE__, __LINE__, 5, &vcycle->Part, &vcycle->vdest,
			  &vcycle->vlno, &vcycle->LevelMap, &vcycle->hg);
      else
	Zoltan_Multifree (__FILE__, __LINE__, 3, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->hg);
    }
    else                   /* cleanup top level */
      Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap,
                        &vcycle->LevelData);
    del = vcycle;
    vcycle = vcycle->finer;
    ZOLTAN_FREE(&del);
  }

  if (reset_geometric_matching) {
    strcpy(hgp->redm_str, reset_geometric_string);
    Zoltan_PHG_Set_Matching_Fn(hgp);
  }

  if (do_timing)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
  ZOLTAN_TRACE_EXIT(zz, yo) ;
  return err;
}
Пример #6
0
int Zoltan_PHG_Gather_To_All_Procs(
  ZZ *zz, 
  HGraph *phg,           /* Input:   Local part of distributed hypergraph */
  PHGPartParams *hgp,        /* Input:   Hypergraph parameters */
  PHGComm *scomm,        /* Input:   Serial PHGComm for use by shg. */
  HGraph **gathered_hg   /* Output:  combined hypergraph combined to proc */
)
{
/* 
 * Function to gather distributed hypergraph onto each processor for
 * coarsest partitioning.
 * First hypergraph arrays for the hypergraph on a column of processors
 * are built using MPI_Allgathers down the processor columns.
 * These hypergraph arrays contain complete info about a subset of vertices.
 * Second the column hypergraphs are gathered along processor rows.
 * Each processor then has a complete description of the hypergraph.
 */
char *yo = "Zoltan_PHG_Gather_To_All_Procs";
int ierr = ZOLTAN_OK;
int i, tmp, sum;
int *each = NULL,
    *disp = NULL;      /* Size and displacement arrays for MPI_Allgatherv */
int *send_buf = NULL;    /* Buffer of values to be sent */
int send_size;           /* Size of buffer send_buf */
int *col_vedge = NULL;   /* vedge array for the proc-column hypergraph */
int *col_vindex = NULL;  /* vindex array for the proc-column hypergraph */
int *col_hvertex = NULL; /* hvertex array for the proc-column hypergraph */
int *col_hindex = NULL;  /* hindex array for the proc-column hypergraph */
int col_nVtx;            /* Number of vertices in processor column */
int col_nEdge;           /* Number of edges in processor column */
int col_nPin;            /* Number of pins in processor column */

int *recv_size = NULL;   /* nPins for each proc in col or row */

HGraph *shg;             /* Pointer to the serial hypergraph to be
                            returned by this function. */

int myProc_x = phg->comm->myProc_x;
int nProc_x = phg->comm->nProc_x;
int nProc_y = phg->comm->nProc_y;
int max_nProc_xy = MAX(nProc_x, nProc_y);

  if (phg->comm->nProc == 1) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Do not call this routine on one proc.");
    return ZOLTAN_FATAL;
  }

#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, phg, NULL, stdout, "GatherBefore");/* NULL parts for now;
                                                           add non-NULL later */
#endif

  /******************************************************************
   *  0. Allocate the hypergraph to be returned. 
   *  Set values that we already know. 
   ******************************************************************/

  shg = *gathered_hg = (HGraph *) ZOLTAN_MALLOC(sizeof(HGraph));
  if (!shg) MEMORY_ERROR;

  Zoltan_HG_HGraph_Init(shg);
  shg->nVtx = phg->dist_x[nProc_x];    /* TODO64 - can this exceed 2B? */
  shg->nEdge = phg->dist_y[nProc_y];

  shg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  shg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  if (!shg->dist_x || !shg->dist_y) MEMORY_ERROR;

  shg->dist_x[0] = shg->dist_y[0] = 0;
  shg->dist_x[1] = shg->nVtx;
  shg->dist_y[1] = shg->nEdge;

  shg->comm = scomm;

  shg->EdgeWeightDim = phg->EdgeWeightDim;
  shg->VtxWeightDim = phg->VtxWeightDim;
  if (shg->VtxWeightDim && shg->nVtx)
    shg->vwgt = (float *) ZOLTAN_MALLOC(shg->nVtx * shg->VtxWeightDim 
                                                  * sizeof(float));
  if (shg->EdgeWeightDim && shg->nEdge)
    shg->ewgt = (float *) ZOLTAN_MALLOC(shg->nEdge * shg->EdgeWeightDim 
                                                  * sizeof(float));
  /* Fixed vertices */
  shg->bisec_split = phg->bisec_split;
  if (hgp->UseFixedVtx)
    shg->fixed_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  if (hgp->UsePrefPart)
    shg->pref_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  
  /* Allocate arrays for use in gather operations */
  recv_size = (int *) ZOLTAN_MALLOC(3 * max_nProc_xy * sizeof(int));
  each = recv_size + max_nProc_xy;
  disp = each + max_nProc_xy;
 
  /* TODO64 - phg->dist_y[nProc_y] could exceed 2 Billion, NO? */
  send_size = MAX(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x], 
                  phg->dist_y[nProc_y]);
  send_buf = (int *) ZOLTAN_MALLOC(send_size * sizeof(int));
  

  if ((shg->VtxWeightDim && shg->nVtx && !shg->vwgt) ||
      (shg->EdgeWeightDim && shg->nEdge && !shg->ewgt) || !recv_size ||
      (send_size && !send_buf)) 
    MEMORY_ERROR;
  

  /*************************************************************
   *  1. Gather all non-zeros for vertices in processor column *
   *************************************************************/
  
  if (nProc_y == 1) {
    /* 
     * Don't need a gather; just set pointers appropriately for row-gather
     * in Step 2 below.
     */

    col_nVtx = phg->nVtx;
    col_nEdge = phg->nEdge;
    col_nPin = phg->nPins;
    col_vindex = phg->vindex;
    col_vedge = phg->vedge;
    col_hindex = phg->hindex;
    col_hvertex = phg->hvertex;

    for (i = 0; i < shg->EdgeWeightDim * shg->nEdge; i++)
      shg->ewgt[i] = phg->ewgt[i];
  }

  else {

    /* Gather local size info for each proc in column */

    MPI_Allgather(&(phg->nPins), 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->col_comm);
  
    /* Compute number of vtx, edge, and nnz in column */
    col_nVtx = (int)(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x]);
    col_nEdge = phg->dist_y[nProc_y];   /* SCHEMEA */
    col_nPin = 0;
    for (i = 0; i < nProc_y; i++) {
      col_nPin += recv_size[i];
    }
    
    /* Allocate arrays for column hypergraph */
    col_hindex = (int *) ZOLTAN_CALLOC((col_nEdge+1), sizeof(int));
    col_hvertex = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    col_vindex = (int *) ZOLTAN_CALLOC((col_nVtx+1), sizeof(int));
    col_vedge = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    if (!col_vindex || !col_hindex || 
        (col_nPin && (!col_vedge || !col_hvertex)))
      MEMORY_ERROR;
    
    /* Gather hvertex data for all procs in column */
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex LNO to GNO here. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_y; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(phg->hvertex, phg->nPins, MPI_INT,
                   col_hvertex, recv_size, disp, MPI_INT, phg->comm->col_comm);
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex GNO to LNO here */
  
    /* Gather hindex data for all procs in column */
  
    for (i = 0; i < phg->nEdge; i++)
      send_buf[i] = phg->hindex[i+1] - phg->hindex[i];
  
    /* SCHEMEA can assume a recv for each edge;
     * SCHEMEB needs to gather the number of edges recv'd from each proc. */
  
    for (i = 0; i < nProc_y; i++) 
      each[i] = phg->dist_y[i+1] - phg->dist_y[i];

    disp[0] = 0;  /* Can't use dist_y because it may not be sizeof(int) */
    for (i=1; i < nProc_y; i++){
      disp[i] = disp[i-1] + each[i-1];
    }
  
    /* SCHEMEA can use phg->dist_y for displacement array.
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, phg->nEdge, MPI_INT, 
                   col_hindex, each, disp, MPI_INT, phg->comm->col_comm);
  
    /* Perform prefix sum on col_hindex */
    sum = 0;
    for (i = 0; i < col_nEdge; i++) {
      tmp = col_hindex[i];
      col_hindex[i] = sum;
      sum += tmp;
    }
    col_hindex[col_nEdge] = sum;

    /* Sanity check */
    if (col_hindex[col_nEdge] != col_nPin) {
      printf("%d Sanity check failed:  "
             "col_hindex[col_nEdge] %d != col_nPin %d\n", 
              zz->Proc, col_hindex[col_nEdge], col_nPin);
      exit(-1);
    }
  
    /* Gather edge weights, if any. */
    if (shg->EdgeWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->EdgeWeightDim;
      for (i = 1; i < nProc_y; i++) {
        each[i] *= phg->EdgeWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->ewgt, phg->nEdge*phg->EdgeWeightDim, MPI_FLOAT, 
                     shg->ewgt, each, disp, MPI_FLOAT, phg->comm->col_comm);
    }
   
  
    Zoltan_HG_Mirror(col_nEdge, col_hindex, col_hvertex, 
                     col_nVtx, col_vindex, col_vedge);
  
  }  /* End column-gather */
  
  /*************************************************************
   *  2. Gather all non-zeros for edges in processor rows      *
   *  All processors in a processor column now have the same   *
   *  hypergraph; we now gather it across rows.                *
   *************************************************************/

  if (nProc_x == 1) {
    /* 
     * Don't need a gather across the row; just set pointers appropriately
     * in shg.
     */
    shg->vindex = col_vindex;
    shg->vedge = col_vedge;
    shg->hindex = col_hindex;
    shg->hvertex = col_hvertex;

    /* Copy vwgt and fixed arrays so shg owns this memory */
    for (i = 0; i < shg->VtxWeightDim*shg->nVtx; i++)
      shg->vwgt[i] = phg->vwgt[i];
    if (hgp->UseFixedVtx)
      for (i = 0; i < shg->nVtx; i++)
        shg->fixed_part[i] = phg->fixed_part[i];
    if (hgp->UsePrefPart)
      for (i = 0; i < shg->nVtx; i++)
        shg->pref_part[i] = phg->pref_part[i];
  }

  else {

    /* Gather info about size within the row */

    MPI_Allgather(&col_nPin, 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->row_comm);
  
    tmp = 0;
    for (i = 0; i < nProc_x; i++) 
      tmp += recv_size[i];

    shg->nPins = tmp;
  
    shg->vindex = (int *) ZOLTAN_CALLOC((shg->nVtx+1), sizeof(int));
    shg->vedge = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
    shg->hindex = (int *) ZOLTAN_CALLOC((shg->nEdge+1), sizeof(int));
    shg->hvertex = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
   
    if (!shg->vindex || !shg->hindex ||
        (shg->nPins && (!shg->vedge || !shg->hvertex)))
      MEMORY_ERROR;
    
    /* Gather vedge data for all procs in row */
  
    /* SCHEMEA can send local edge numbers; 
       SCHEMEB requires edge LNO to GNO conversion. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_x; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(col_vedge, col_nPin, MPI_INT,
                   shg->vedge, recv_size, disp, MPI_INT, phg->comm->row_comm);
  
    /* Gather vindex data for all procs in row */
  
    for (i = 0; i < col_nVtx; i++)
      send_buf[i] = col_vindex[i+1] - col_vindex[i];
  
    /* SCHEMEA can assume a recv for each vertex;
     * SCHEMEB would need to gather the number of vtxs recv'd from each proc. */
  
    for (i = 0; i < nProc_x; i++) 
      each[i] = (int)(phg->dist_x[i+1] - phg->dist_x[i]);

    disp[0] = 0;  /* Can't use dist_x, may not be sizeof(int) */
    for (i = 1; i < nProc_x; i++) 
      disp[i] = disp[i-1] + each[i-1];

    /* SCHEMEA can use phg->dist_x as displacement array;
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, col_nVtx, MPI_INT, 
                   shg->vindex, each, disp,
                   MPI_INT, phg->comm->row_comm);

    /* Perform prefix sum on shg->vindex */
    sum = 0;
    for (i = 0; i < shg->nVtx; i++) {
      tmp = shg->vindex[i];
      shg->vindex[i] = sum;
      sum += tmp;
    }
    shg->vindex[shg->nVtx] = sum;
  
    /* Sanity check */
    if (shg->vindex[shg->nVtx] != shg->nPins) {
      printf("%d Sanity check failed:  "
             "shg->vindex %d != nPins %d\n", 
              zz->Proc, shg->vindex[shg->nVtx], shg->nPins);
      exit(-1);
    }
  
    /* Gather fixed array, if any  */
    if (hgp->UseFixedVtx){
  
#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather before gather. phg->fixed =");
      for (i=0; i<phg->nVtx; i++){
        printf(" %d ", phg->fixed_part[i]);
      }
      printf("\n");
#endif

      /* Can use the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->fixed_part, phg->nVtx, MPI_FLOAT, 
                     shg->fixed_part, each, disp, MPI_FLOAT, phg->comm->row_comm);

#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather after gather. shg->fixed =");
      for (i=0; i<shg->nVtx; i++){
        printf(" %d ", shg->fixed_part[i]);
      }
      printf("\n");
#endif
    }
    /* Gather pref part array, if any  */
    if (hgp->UsePrefPart){
      /* Can use the same each array. */
      /* Need to compute new disp array. */
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->pref_part, phg->nVtx, MPI_FLOAT, 
                     shg->pref_part, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
    
    /* Gather vertex weights, if any. */
    if (shg->VtxWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->VtxWeightDim;
      for (i = 1; i < nProc_x; i++) {
        each[i] *= phg->VtxWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->vwgt, phg->nVtx*phg->VtxWeightDim, MPI_FLOAT, 
                     shg->vwgt, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
  
    Zoltan_HG_Mirror(shg->nVtx, shg->vindex, shg->vedge, 
                     shg->nEdge, shg->hindex, shg->hvertex);

  }  /* End row gather */
  
#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, shg, NULL, stdout, "GatherAfter");/* NULL parts for now;
                                                           add non-NULL later */
  Zoltan_PHG_Plot_2D_Distrib(zz, phg);
  Zoltan_PHG_Plot_2D_Distrib(zz, shg);
#endif

End:

  if (ierr < 0) {
    Zoltan_HG_HGraph_Free(*gathered_hg);
    ZOLTAN_FREE(gathered_hg);
  }

  Zoltan_Multifree(__FILE__, __LINE__, 2, &send_buf, 
                                          &recv_size);

  if (nProc_x > 1 && nProc_y > 1) 
    Zoltan_Multifree(__FILE__, __LINE__, 4, &col_vedge,
                                            &col_vindex,
                                            &col_hvertex,
                                            &col_hindex);
  return ierr;
}
Пример #7
0
int Zoltan_PHG_CoarsePartition(
  ZZ *zz, 
  HGraph *phg,         /* Input:  coarse hypergraph -- distributed! */
  int numPart,         /* Input:  number of partitions to generate. */
  float *part_sizes,   /* Input:  array of size numPart listing target sizes
                                  (% of work) for the partitions */
  Partition part,      /* Input:  array of initial partition assignments.
                          Output: array of computed partition assignments.   */
  PHGPartParams *hgp   /* Input:  parameters to use.  */
)
{
/* 
 * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph.
 * Typically, this routine is called at the bottom level in a
 * multilevel scheme (V-cycle).
 * It gathers the distributed hypergraph to each processor and computes
 * a decomposition of the serial hypergraph.  
 * It computes a different partition on each processor
 * using different random numbers (and possibly also
 * different algorithms) and selects the best.
 */
char *yo = "Zoltan_PHG_CoarsePartition";
int ierr = ZOLTAN_OK;
int i, si, j;
static PHGComm scomm;          /* Serial communicator info */
static int first_time = 1;
HGraph *shg = NULL;            /* Serial hypergraph gathered from phg */
int *spart = NULL;             /* Partition vectors for shg. */
int *new_part = NULL;          /* Ptr to new partition vector. */
float *bestvals = NULL;        /* Best cut values found so far */
int worst, new_cand;
float bal, cut, worst_cut;
int fine_timing = (hgp->use_timers > 2);
struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
int local_coarse_part = hgp->LocalCoarsePartition;

/* Number of iterations to try coarse partitioning on each proc. */
/* 10 when p=1, and 1 when p is large. */
const int num_coarse_iter = 1 + 9/zz->Num_Proc; 


  ZOLTAN_TRACE_ENTER(zz, yo);

  if (fine_timing) {
    if (timer->cpgather < 0)
      timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
    if (timer->cprefine < 0)
      timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
    if (timer->cpart < 0)
      timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");

    ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
  }


  /* Force LocalCoarsePartition if large global graph */
#define LARGE_GRAPH_VTX   64000
#define LARGE_GRAPH_PINS 256000
  if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){
    /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */
    local_coarse_part = 1;
  }

  /* take care of all special cases first */

  if (!strcasecmp(hgp->coarsepartition_str, "no")
      || !strcasecmp(hgp->coarsepartition_str, "none")) {
    /* Do no coarse partitioning. */
    /* Do a sanity test and  mapping to parts [0,...,numPart-1] */
    int first = 1;
    PHGComm *hgc=phg->comm;    

    Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm);
    if (hgp->UsePrefPart) {
        for (i = 0; i < phg->nVtx; i++) {
            /* Impose fixed vertex/preferred part constraints. */
            if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */
                /* randomly assigned to a part */
                part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart);
            } else {
                if (phg->bisec_split < 0)
                    /* direct k-way, use part numbers directly */
                    part[i] = phg->pref_part[i];
                else
                    /* recursive bisection, map to 0-1 part numbers */
                    part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1);
            }            
        }
    } else {
        for (i = 0; i < phg->nVtx; i++) {
            if (part[i] >= numPart || part[i]<0) {
                if (first) {
                    ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts.");
                    first = 0;
                    ierr = ZOLTAN_WARN;
                }
                part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart;
            }        
        }
    }
  }
  else if (numPart == 1) {            
    /* everything goes in the one partition */
    for (i =  0; i < phg->nVtx; i++)
      part[i] = 0;
  }
  else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { 
    /* more partitions than vertices, trivial answer */
    for (i = 0; i < phg->nVtx; i++)
      part[i] = phg->dist_x[phg->comm->myProc_x]+i;
  }
  else if (local_coarse_part) {
    /* Apply local partitioner to each column */
    ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp,
                                    hgp->CoarsePartition);
  }
  else {
    /* Normal case:
     * Gather distributed HG to each processor;
     * compute different partitioning on each processor;
     * select the "best" result.
     */
    ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition;

    /* Select different coarse partitioners for processors here. */

    CoarsePartition = hgp->CoarsePartition;
    if (CoarsePartition == NULL) { /* auto */
      /* Select a coarse partitioner from the array of coarse partitioners */
      CoarsePartition = CoarsePartitionFns[phg->comm->myProc % 
                                           NUM_COARSEPARTITION_FNS];
    }


    if (phg->comm->nProc == 1) {
      /* Serial and parallel hgraph are the same. */
      shg = phg;
    }
    else {
      /* Set up a serial communication struct for gathered HG */

      if (first_time) {
        scomm.nProc_x = scomm.nProc_y = 1;
        scomm.myProc_x = scomm.myProc_y = 0;
        scomm.Communicator = MPI_COMM_SELF;
        scomm.row_comm = MPI_COMM_SELF;
        scomm.col_comm = MPI_COMM_SELF;
        scomm.myProc = 0;
        scomm.nProc = 1;
        first_time = 0;
      }
      scomm.RNGState = Zoltan_Rand(NULL);
      scomm.RNGState_row = Zoltan_Rand(NULL);
      scomm.RNGState_col = Zoltan_Rand(NULL);
      scomm.zz = zz;

      /* 
       * Gather parallel hypergraph phg to each processor, creating
       * serial hypergraph shg.
       */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator);
      }

      ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather.");
        goto End;
      }

      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

    }

    /* 
     * Allocate partition array spart for the serial hypergraph shg
     * and partition shg.
     */
    spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1),
                                    sizeof(int));
    bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); 
    if ((!spart) || (!bestvals)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }
    
    /* Compute several coarse partitionings. */
    /* Keep the NUM_PART_KEEP best ones around. */
    /* Currently, only the best one is used. */

    /* Set RNG so different procs compute different parts. */
    Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL);

    new_cand = 0;
    new_part = spart;

    for (i=0; i< num_coarse_iter; i++){
      int savefmlooplimit=hgp->fm_loop_limit;
        
      /* Overwrite worst partition with new candidate. */
      ierr = CoarsePartition(zz, shg, numPart, part_sizes, 
               new_part, hgp);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from CoarsePartition.");
        goto End;
      }

      /* time refinement step in coarse partitioner */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator);
      }

      /* UVCUVC: Refine new candidate: only one pass is enough. */
      hgp->fm_loop_limit = 1;
      Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp);
      hgp->fm_loop_limit = savefmlooplimit;
      
      /* stop refinement timer */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

      /* Decide if candidate is in the top tier or not. */
      /* Our objective is a combination of cuts and balance */

      bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, 
                                       numPart, new_part); 
      cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr);
      
      /* Use ratio-cut as our objective. There are many other options! */
      bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */

      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from Zoltan_PHG_Compute_ConCut.");
        goto End;
      }
      if (i<NUM_PART_KEEP)
        new_cand = i+1;
      else {
        /* find worst partition vector, to overwrite it */
        /* future optimization: keep bestvals sorted */
        worst = 0;
        worst_cut = bestvals[0];
        for (j=1; j<NUM_PART_KEEP+1; j++){
          if (worst_cut < bestvals[j]){
            worst_cut = bestvals[j];
            worst = j;
          }
        }
        new_cand = worst;
      }
      new_part = spart+new_cand*(shg->nVtx);
    }
    /* Copy last partition vector such that all the best ones
       are contiguous starting at spart.                     */
    for (i=0; i<shg->nVtx; i++){
      new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i];
    }
    /* Also update bestvals */
    bestvals[new_cand] = bestvals[NUM_PART_KEEP];

    /* Evaluate and select the best. */
    /* For now, only pick the best one, in the future we pick the k best. */

    ierr = pick_best(zz, hgp, phg->comm, shg, numPart, 
              MIN(NUM_PART_KEEP, num_coarse_iter), spart,
              bestvals);
    if (ierr < 0) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                        "Error returned from pick_best.");
      goto End;
    }
  
    if (phg->comm->nProc > 1) {
      /* Map gathered partition back to 2D distribution */
      for (i = 0; i < phg->nVtx; i++) {
        /* KDDKDD  Assume vertices in serial HG are ordered by GNO of phg */
        si = VTX_LNO_TO_GNO(phg, i);
        part[i] = spart[si];
      }

      Zoltan_HG_HGraph_Free(shg);
      ZOLTAN_FREE(&shg);
    } 
    else { /* single processor */
      for (i = 0; i < phg->nVtx; i++)
        part[i] = spart[i];
    }
    ZOLTAN_FREE(&spart);
    ZOLTAN_FREE(&bestvals);
  }
  
End:
  if (fine_timing) 
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ierr;
}