예제 #1
0
void Zoltan_HG_Print(
  ZZ *zz,
  HGraph *hg,
  Partition parts,
  FILE *fp,
  char *str
)
{
/* Routine to print hypergraph weights and edges. Assumes serial execution;
 * put inside Zoltan_Print_Sync_Start/Zoltan_Print_Sync_End for parallel
 * programs. 
 */
int i, j;
int num_vwgt;
int num_ewgt;
float *sum;
char *yo = "Zoltan_HG_Print";

  if (hg == NULL)
    return;

  ZOLTAN_TRACE_ENTER(zz, yo);

  num_vwgt = hg->VtxWeightDim;
  num_ewgt = hg->EdgeWeightDim;

  sum = (float *) ZOLTAN_MALLOC(MAX(num_vwgt, num_ewgt) * sizeof(float));

  fprintf(fp, "%s nVtx=%d nEdge=%d nPins=%d vWgt=%d eWgt=%d\n", 
          str, hg->nVtx, hg->nEdge, hg->nPins, 
          hg->VtxWeightDim, hg->EdgeWeightDim);

  /* Print Vertex Info */
  fprintf(fp, "%s Vertices:  (edges)\n", str);
  for (i = 0; i < hg->nVtx; i++) {
    fprintf(fp, "%d (%d) in part %d:  ", 
            i, VTX_LNO_TO_GNO(hg, i), (parts ? parts[i] : -1));
    fprintf(fp, "(");
    for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++)
      fprintf(fp, "%d ", hg->vedge[j]);
    fprintf(fp, ")\n");
  }

  if (hg->vwgt != NULL) {
    for (j = 0; j < num_vwgt; j++) sum[j] = 0;
    fprintf(fp, "%s Vertices: [weights])\n", str);
    for (i = 0; i < hg->nVtx; i++) {
      fprintf(fp, "%d (%d):  [", i, VTX_LNO_TO_GNO(hg, i));
      for (j = 0; j < num_vwgt; j++) {
        fprintf(fp, "%f ", hg->vwgt[i*num_vwgt + j]);
        sum[j] += hg->vwgt[i*num_vwgt + j];
      }
      fprintf(fp, "])\n");
    }
    fprintf(fp, "Total vertex weight = [");
    for (j = 0; j < num_vwgt; j++) fprintf(fp, "%f  ", sum[j]);
    fprintf(fp, "]\n");
  }

  /* Print Hyperedge Info */
  fprintf(fp, "%s Hyperedges:  (vertices)\n", str);
  for (i = 0; i < hg->nEdge; i++) {
    fprintf(fp, "%d (%d):  ", i, EDGE_LNO_TO_GNO(hg, i));
    fprintf(fp, "(");
    for (j = hg->hindex[i]; j < hg->hindex[i+1]; j++)
      fprintf(fp, "%d ", hg->hvertex[j]);
    fprintf(fp, ")\n");
  }

  if (hg->ewgt != NULL) {
    for (j = 0; j < num_ewgt; j++) sum[j] = 0;
    fprintf(fp, "%s Hyperedge Weights:  [weights]\n", str);
    for (i = 0; i < hg->nEdge; i++) {
      fprintf(fp, "%d (%d):  ", i, EDGE_LNO_TO_GNO(hg, i));
      fprintf(fp, "[");
      for (j = 0; j < num_ewgt; j++) {
        fprintf(fp, "%f ", hg->ewgt[i*num_ewgt + j]);
        sum[j] += hg->ewgt[i*num_ewgt + j];
      }
      fprintf(fp, "])\n");
    }
    fprintf(fp, "Total hyperedge weight = [");
    for (j = 0; j < num_ewgt; j++) fprintf(fp, "%f  ", sum[j]);
    fprintf(fp, "]\n");
  }

  ZOLTAN_FREE(&sum);
  ZOLTAN_TRACE_EXIT(zz, yo);
}
예제 #2
0
int Zoltan_PHG_CoarsePartition(
  ZZ *zz, 
  HGraph *phg,         /* Input:  coarse hypergraph -- distributed! */
  int numPart,         /* Input:  number of partitions to generate. */
  float *part_sizes,   /* Input:  array of size numPart listing target sizes
                                  (% of work) for the partitions */
  Partition part,      /* Input:  array of initial partition assignments.
                          Output: array of computed partition assignments.   */
  PHGPartParams *hgp   /* Input:  parameters to use.  */
)
{
/* 
 * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph.
 * Typically, this routine is called at the bottom level in a
 * multilevel scheme (V-cycle).
 * It gathers the distributed hypergraph to each processor and computes
 * a decomposition of the serial hypergraph.  
 * It computes a different partition on each processor
 * using different random numbers (and possibly also
 * different algorithms) and selects the best.
 */
char *yo = "Zoltan_PHG_CoarsePartition";
int ierr = ZOLTAN_OK;
int i, si, j;
static PHGComm scomm;          /* Serial communicator info */
static int first_time = 1;
HGraph *shg = NULL;            /* Serial hypergraph gathered from phg */
int *spart = NULL;             /* Partition vectors for shg. */
int *new_part = NULL;          /* Ptr to new partition vector. */
float *bestvals = NULL;        /* Best cut values found so far */
int worst, new_cand;
float bal, cut, worst_cut;
int fine_timing = (hgp->use_timers > 2);
struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
int local_coarse_part = hgp->LocalCoarsePartition;

/* Number of iterations to try coarse partitioning on each proc. */
/* 10 when p=1, and 1 when p is large. */
const int num_coarse_iter = 1 + 9/zz->Num_Proc; 


  ZOLTAN_TRACE_ENTER(zz, yo);

  if (fine_timing) {
    if (timer->cpgather < 0)
      timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
    if (timer->cprefine < 0)
      timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
    if (timer->cpart < 0)
      timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");

    ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
  }


  /* Force LocalCoarsePartition if large global graph */
#define LARGE_GRAPH_VTX   64000
#define LARGE_GRAPH_PINS 256000
  if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){
    /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */
    local_coarse_part = 1;
  }

  /* take care of all special cases first */

  if (!strcasecmp(hgp->coarsepartition_str, "no")
      || !strcasecmp(hgp->coarsepartition_str, "none")) {
    /* Do no coarse partitioning. */
    /* Do a sanity test and  mapping to parts [0,...,numPart-1] */
    int first = 1;
    PHGComm *hgc=phg->comm;    

    Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm);
    if (hgp->UsePrefPart) {
        for (i = 0; i < phg->nVtx; i++) {
            /* Impose fixed vertex/preferred part constraints. */
            if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */
                /* randomly assigned to a part */
                part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart);
            } else {
                if (phg->bisec_split < 0)
                    /* direct k-way, use part numbers directly */
                    part[i] = phg->pref_part[i];
                else
                    /* recursive bisection, map to 0-1 part numbers */
                    part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1);
            }            
        }
    } else {
        for (i = 0; i < phg->nVtx; i++) {
            if (part[i] >= numPart || part[i]<0) {
                if (first) {
                    ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts.");
                    first = 0;
                    ierr = ZOLTAN_WARN;
                }
                part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart;
            }        
        }
    }
  }
  else if (numPart == 1) {            
    /* everything goes in the one partition */
    for (i =  0; i < phg->nVtx; i++)
      part[i] = 0;
  }
  else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { 
    /* more partitions than vertices, trivial answer */
    for (i = 0; i < phg->nVtx; i++)
      part[i] = phg->dist_x[phg->comm->myProc_x]+i;
  }
  else if (local_coarse_part) {
    /* Apply local partitioner to each column */
    ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp,
                                    hgp->CoarsePartition);
  }
  else {
    /* Normal case:
     * Gather distributed HG to each processor;
     * compute different partitioning on each processor;
     * select the "best" result.
     */
    ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition;

    /* Select different coarse partitioners for processors here. */

    CoarsePartition = hgp->CoarsePartition;
    if (CoarsePartition == NULL) { /* auto */
      /* Select a coarse partitioner from the array of coarse partitioners */
      CoarsePartition = CoarsePartitionFns[phg->comm->myProc % 
                                           NUM_COARSEPARTITION_FNS];
    }


    if (phg->comm->nProc == 1) {
      /* Serial and parallel hgraph are the same. */
      shg = phg;
    }
    else {
      /* Set up a serial communication struct for gathered HG */

      if (first_time) {
        scomm.nProc_x = scomm.nProc_y = 1;
        scomm.myProc_x = scomm.myProc_y = 0;
        scomm.Communicator = MPI_COMM_SELF;
        scomm.row_comm = MPI_COMM_SELF;
        scomm.col_comm = MPI_COMM_SELF;
        scomm.myProc = 0;
        scomm.nProc = 1;
        first_time = 0;
      }
      scomm.RNGState = Zoltan_Rand(NULL);
      scomm.RNGState_row = Zoltan_Rand(NULL);
      scomm.RNGState_col = Zoltan_Rand(NULL);
      scomm.zz = zz;

      /* 
       * Gather parallel hypergraph phg to each processor, creating
       * serial hypergraph shg.
       */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator);
      }

      ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather.");
        goto End;
      }

      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

    }

    /* 
     * Allocate partition array spart for the serial hypergraph shg
     * and partition shg.
     */
    spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1),
                                    sizeof(int));
    bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); 
    if ((!spart) || (!bestvals)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }
    
    /* Compute several coarse partitionings. */
    /* Keep the NUM_PART_KEEP best ones around. */
    /* Currently, only the best one is used. */

    /* Set RNG so different procs compute different parts. */
    Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL);

    new_cand = 0;
    new_part = spart;

    for (i=0; i< num_coarse_iter; i++){
      int savefmlooplimit=hgp->fm_loop_limit;
        
      /* Overwrite worst partition with new candidate. */
      ierr = CoarsePartition(zz, shg, numPart, part_sizes, 
               new_part, hgp);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from CoarsePartition.");
        goto End;
      }

      /* time refinement step in coarse partitioner */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator);
      }

      /* UVCUVC: Refine new candidate: only one pass is enough. */
      hgp->fm_loop_limit = 1;
      Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp);
      hgp->fm_loop_limit = savefmlooplimit;
      
      /* stop refinement timer */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

      /* Decide if candidate is in the top tier or not. */
      /* Our objective is a combination of cuts and balance */

      bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, 
                                       numPart, new_part); 
      cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr);
      
      /* Use ratio-cut as our objective. There are many other options! */
      bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */

      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from Zoltan_PHG_Compute_ConCut.");
        goto End;
      }
      if (i<NUM_PART_KEEP)
        new_cand = i+1;
      else {
        /* find worst partition vector, to overwrite it */
        /* future optimization: keep bestvals sorted */
        worst = 0;
        worst_cut = bestvals[0];
        for (j=1; j<NUM_PART_KEEP+1; j++){
          if (worst_cut < bestvals[j]){
            worst_cut = bestvals[j];
            worst = j;
          }
        }
        new_cand = worst;
      }
      new_part = spart+new_cand*(shg->nVtx);
    }
    /* Copy last partition vector such that all the best ones
       are contiguous starting at spart.                     */
    for (i=0; i<shg->nVtx; i++){
      new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i];
    }
    /* Also update bestvals */
    bestvals[new_cand] = bestvals[NUM_PART_KEEP];

    /* Evaluate and select the best. */
    /* For now, only pick the best one, in the future we pick the k best. */

    ierr = pick_best(zz, hgp, phg->comm, shg, numPart, 
              MIN(NUM_PART_KEEP, num_coarse_iter), spart,
              bestvals);
    if (ierr < 0) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                        "Error returned from pick_best.");
      goto End;
    }
  
    if (phg->comm->nProc > 1) {
      /* Map gathered partition back to 2D distribution */
      for (i = 0; i < phg->nVtx; i++) {
        /* KDDKDD  Assume vertices in serial HG are ordered by GNO of phg */
        si = VTX_LNO_TO_GNO(phg, i);
        part[i] = spart[si];
      }

      Zoltan_HG_HGraph_Free(shg);
      ZOLTAN_FREE(&shg);
    } 
    else { /* single processor */
      for (i = 0; i < phg->nVtx; i++)
        part[i] = spart[i];
    }
    ZOLTAN_FREE(&spart);
    ZOLTAN_FREE(&bestvals);
  }
  
End:
  if (fine_timing) 
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ierr;
}
예제 #3
0
void print_hypergraph(ZZ *zz, ZHG *zhg, int sumWeight)
{
  int i, j, npins;
  int ewdim = zz->Edge_Weight_Dim;
  int vwdim = zhg->objWeightDim;
  float sum;
  float *wgt, *vwgt;
  int *pin, *owner, *lno;
  HGraph *hg = &zhg->HG;
  int p = zz->Proc;

  /* The ZHG structure contains the hypergraph returned by the query functions,
   * including modifications based on ADD_OBJ_WEIGHT and PHG_EDGE_WEIGHT_OPERATION.
   * If the PHG hypergraph build has completed, the edge list only contains the removed
   * edges.  If LB_Eval build the ZHG structure, it contains all edges.
   *
   * the HGraph structure contains that hypergraph with modifications made
   * for the PHG algorithm.  This may include addition of repartition
   * vertices and edges, and removal of dense edges.
   */

  wgt = zhg->objWeight;

  printf("(%d) %d INPUT VERTICES (out of %d) : gno (gid/lid) (weights) nhedges fixed inpart outpart objSize)\n",p, zhg->nObj, zhg->globalObj);

  for (i=0; i<zhg->nObj; i++){

    printf("  %d (",zhg->objGNO[i]);

    if (zhg->objGID)
      printf("%d/",zhg->objGID[i]);
    else
      printf("-/");

    if (zhg->objLID)
      printf("%d) (",zhg->objLID[i]);
    else
      printf("/-) (");

    for (j=0; j < vwdim; j++){
      printf("%f",*wgt++);
      if (j < vwdim-1) printf(", ");
    }

    if (zhg->numHEdges)
      printf(") %d ",zhg->numHEdges[i]);
    else
      printf(") - ");

    if (zhg->fixed)
      printf(" %d ",zhg->fixed[i]);
    else
      printf(" - ");

    if (zhg->Input_Parts)
      printf(" %d ",zhg->Input_Parts[i]);
    else
      printf(" - ");

    if (zhg->Output_Parts)
      printf(" %d ",zhg->Output_Parts[i]);
    else
      printf(" - ");

    if (zhg->AppObjSizes)
      printf(" %d ",zhg->AppObjSizes[i]);
    else
      printf(" - ");

    printf("\n");
  }
  printf("\n");
 
  wgt = zhg->Ewgt;
  pin = zhg->pinGNO;
  owner = zhg->Pin_Procs;
   
  printf("(%d) %d INPUT or REMOVED EDGES (out of %d), %d pins: gno size (weights) (pinGNO/pinProc)\n",
                  p, zhg->nHedges, zhg->globalHedges, zhg->nPins);

  for (i=0; i < zhg->nHedges; i++){

    printf("  %d %d (", zhg->edgeGNO[i], zhg->Esize[i]);

    if (wgt){
      for (j=0; j < ewdim; j++){
        printf("%f",*wgt++);
        if (j < ewdim - 1) printf(", ");
      }
    }
    printf(") (");

    for (j=0; j < zhg->Esize[i]; j++){
      printf("%d/%d", *pin++, *owner++);
      if (j < zhg->Esize[i] - 1) printf(" ");
    }

    printf(")\n");
  }
  printf("\n");

  printf("(%d) %d PHG EDGES (%d weights), %d total PHG PINS:\n",
          p, hg->nEdge, ewdim, hg->nPins);

  wgt = hg->ewgt;
  lno = hg->hvertex;
  vwgt = hg->vwgt;

  for (i=0; i<hg->nEdge; i++){
    npins = hg->hindex[i+1] - hg->hindex[i];

    printf(" edge %d: ",EDGE_LNO_TO_GNO(hg, i));
    for (j=0; j<ewdim; j++){
      printf(" %f",*wgt++);
    }
    printf("\n %d pins: ", npins);
    for (j=0; j<npins; j++){
      printf("%d ", *lno++);
    }
    printf("\n");
  }
  printf("\n");

  printf("(%d) %d PHG PIN global numbers and %d weights:\n", p, hg->nVtx, vwdim);

  sum = 0;

  for (i=0; i<hg->nVtx; i++){
    printf("  %d  %d: ", i, VTX_LNO_TO_GNO(hg, i));
    for (j=0; j<vwdim; j++){
      if (j==sumWeight) sum += *vwgt;
      printf("%f ", *vwgt++);
    }
    printf("\n");
  }
  printf("\n");
  if (sum > 0.0) printf("(%d) Weight %d sums to %f\n\n",p, sumWeight+1,sum);
}
예제 #4
0
void Zoltan_PHG_Plot_2D_Distrib(
  ZZ *zz,
  HGraph *phg
)
{
/* Routine that produces gnuplot output of 2D data distribution in form of 
 * a matrix.
 * One column for each vertex.
 * One row for each hyperedge.
 * Separate files are produced for each processor.
 * Vertex and edge global node numbers are used for "coordinates" in plotting.
 * No partitioning information is displayed; only the 2D data distribution 
 * is shown.
 */
static int cnt = 0;
char filename[32];
FILE *fp = NULL;
int i, j;
int egno, vgno;

  sprintf(filename, "phg%02d.%02d", cnt, zz->Proc);
  fp = fopen(filename, "w");

  for (i = 0; i < phg->nEdge; i++) {
    egno = EDGE_LNO_TO_GNO(phg, i);
    for (j = phg->hindex[i]; j < phg->hindex[i+1]; j++) {
      vgno = VTX_LNO_TO_GNO(phg, phg->hvertex[j]);
      fprintf(fp, "%d  %d\n", vgno, -egno);
    }
  }
  fclose(fp);
  if (zz->Proc == 0) {
    sprintf(filename, "phg%02d.gnuload", cnt);
    fp = fopen(filename, "w");
    fprintf(fp, "set data style points\n");
    fprintf(fp, "set pointsize 5\n");
    fprintf(fp, "set nokey\n");
    fprintf(fp, "set xlabel \"vertices\"\n");
    fprintf(fp, "set ylabel \"-hyperedges\"\n");
    fprintf(fp, "plot ");
    for (i = 0; i < zz->Num_Proc; i++) {
      fprintf(fp, "\"phg%02d.%02d\"", cnt, i);
      if (i != zz->Num_Proc-1)
        fprintf(fp, ", ");
      else
        fprintf(fp, "\n");
    }
    fclose(fp);
  }

  /* Sanity check to ensure Mirror is working correctly */
  /* Don't need to generate both sets of files, but they should differ only 
   * in the order of the points */
  sprintf(filename, "phgmirror%02d.%02d", cnt, zz->Proc);
  fp = fopen(filename, "w");

  for (i = 0; i < phg->nVtx; i++) {
    vgno = VTX_LNO_TO_GNO(phg, i);
    for (j = phg->vindex[i]; j < phg->vindex[i+1]; j++) {
      egno = EDGE_LNO_TO_GNO(phg, phg->vedge[j]);
      fprintf(fp, "%d  %d\n", vgno, -egno);
    }
  }
  fclose(fp);
  if (zz->Proc == 0) {
    sprintf(filename, "phgmirror%02d.gnuload", cnt);
    fp = fopen(filename, "w");
    fprintf(fp, "set data style points\n");
    fprintf(fp, "set pointsize 5\n");
    fprintf(fp, "set nokey\n");
    fprintf(fp, "set xlabel \"vertices\"\n");
    fprintf(fp, "set ylabel \"-hyperedges\"\n");
    fprintf(fp, "plot ");
    for (i = 0; i < zz->Num_Proc; i++) {
      fprintf(fp, "\"phgmirror%02d.%02d\"", cnt, i);
      if (i != zz->Num_Proc-1)
        fprintf(fp, ", ");
      else
        fprintf(fp, "\n");
    }
    fclose(fp);
  }

  cnt++;
}