Ejemplo n.º 1
0
int
Zoltan_Distribute_layout (ZZ *zz, const PHGComm * const inlayout,
			  int loRank, int hiRank,
			  int reqx, int reqy,
			  PHGComm *outlayout)
{
  MPI_Group allgrp, newgrp;
  int *ranks;
  MPI_Comm  nmpicomm;
  MPI_Comm  ompicomm;
  int myProc;
  int i;
  int nProc;

  ompicomm = (inlayout != NULL)?inlayout->Communicator:zz->Communicator;
  myProc = (inlayout != NULL)?inlayout->myProc:zz->Proc;
  nProc= (inlayout != NULL)?inlayout->nProc:zz->Num_Proc;
  if (((reqx != 1) && (reqy != 1) && (nProc > 3)) && Zoltan_PHG_isPrime(nProc)) nProc--;

  Zoltan_PHGComm_Init(outlayout);

  /* create a new communicator for procs[lo..hi] */

  MPI_Comm_group(ompicomm, &allgrp);
  ranks = (int *) ZOLTAN_MALLOC(nProc * sizeof(int));
  if (!ranks) return ZOLTAN_MEMERR;

  for (i=loRank; i<=hiRank; ++i)
    ranks[i-loRank] = i;

  MPI_Group_incl(allgrp, nProc, ranks, &newgrp);
  MPI_Comm_create(ompicomm, newgrp, &nmpicomm);
  MPI_Group_free(&newgrp);
  MPI_Group_free(&allgrp);
  ZOLTAN_FREE(&ranks);

  return (Zoltan_PHG_Set_2D_Proc_Distrib(zz, nmpicomm,
					myProc-loRank, nProc,
					reqx, reqy, outlayout));
}
Ejemplo n.º 2
0
int Zoltan_PHG_Initialize_Params(
  ZZ *zz,   /* the Zoltan data structure */
  float *part_sizes,
  PHGPartParams *hgp
)
{
  int err = ZOLTAN_OK;
  char *yo = "Zoltan_PHG_Initialize_Params";
  int nProc;
  int usePrimeComm;
  MPI_Comm communicator;
  char add_obj_weight[MAX_PARAM_STRING_LEN];
  char edge_weight_op[MAX_PARAM_STRING_LEN];
  char cut_objective[MAX_PARAM_STRING_LEN];
  char *package = hgp->hgraph_pkg; 
  char *method = hgp->hgraph_method;
  char buf[1024];

  memset(hgp, 0, sizeof(*hgp)); /* in the future if we forget to initialize
                                   another param at least it will be 0 */
  
  Zoltan_Bind_Param(PHG_params, "HYPERGRAPH_PACKAGE", &hgp->hgraph_pkg);
  Zoltan_Bind_Param(PHG_params, "PHG_MULTILEVEL", &hgp->useMultilevel);
  Zoltan_Bind_Param(PHG_params, "PHG_FROM_GRAPH_METHOD", hgp->convert_str);  
  Zoltan_Bind_Param(PHG_params, "PHG_OUTPUT_LEVEL", &hgp->output_level);
  Zoltan_Bind_Param(PHG_params, "FINAL_OUTPUT", &hgp->final_output); 
  Zoltan_Bind_Param(PHG_params, "CHECK_GRAPH", &hgp->check_graph);   
  Zoltan_Bind_Param(PHG_params, "CHECK_HYPERGRAPH", &hgp->check_graph);   
  Zoltan_Bind_Param(PHG_params, "PHG_NPROC_VERTEX", &hgp->nProc_x_req);
  Zoltan_Bind_Param(PHG_params, "PHG_NPROC_EDGE", &hgp->nProc_y_req);
  Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_LIMIT", &hgp->redl);
  Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_NCANDIDATE", &hgp->nCand);
  Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_METHOD", hgp->redm_str);
  Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_METHOD_FAST", hgp->redm_fast);
  Zoltan_Bind_Param(PHG_params, "PHG_VERTEX_VISIT_ORDER", &hgp->visit_order);
  Zoltan_Bind_Param(PHG_params, "PHG_EDGE_SCALING", &hgp->edge_scaling);
  Zoltan_Bind_Param(PHG_params, "PHG_VERTEX_SCALING", &hgp->vtx_scaling);
  Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_METHOD", hgp->refinement_str);
  Zoltan_Bind_Param(PHG_params, "PHG_DIRECT_KWAY", &hgp->kway);
  Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_LOOP_LIMIT", 
                                &hgp->fm_loop_limit);
  Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_MAX_NEG_MOVE", 
                                &hgp->fm_max_neg_move);  
  Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_QUALITY", 
                                &hgp->refinement_quality);  
  Zoltan_Bind_Param(PHG_params, "PHG_COARSEPARTITION_METHOD", 
                                 hgp->coarsepartition_str);
  Zoltan_Bind_Param(PHG_params, "PHG_USE_TIMERS",
                                 (void*) &hgp->use_timers);  
  Zoltan_Bind_Param(PHG_params, "USE_TIMERS",
                                 (void*) &hgp->use_timers);  
  Zoltan_Bind_Param(PHG_params, "PHG_EDGE_SIZE_THRESHOLD",
                                 (void*) &hgp->EdgeSizeThreshold);  
  Zoltan_Bind_Param(PHG_params, "PHG_MATCH_EDGE_SIZE_THRESHOLD",
                                 (void*) &hgp->MatchEdgeSizeThreshold);  
  Zoltan_Bind_Param(PHG_params, "PHG_BAL_TOL_ADJUSTMENT",
                                 (void*) &hgp->bal_tol_adjustment);  
  Zoltan_Bind_Param(PHG_params, "PARKWAY_SERPART",
                                 (void *) hgp->parkway_serpart);
  Zoltan_Bind_Param(PHG_params, "PHG_CUT_OBJECTIVE",
                                 (void *) &cut_objective);
  Zoltan_Bind_Param(PHG_params, "ADD_OBJ_WEIGHT",
                                 (void *) add_obj_weight);
  Zoltan_Bind_Param(PHG_params, "PHG_EDGE_WEIGHT_OPERATION",
                                 (void *) edge_weight_op);
  Zoltan_Bind_Param(PHG_params, "PHG_RANDOMIZE_INPUT",
                                 (void*) &hgp->RandomizeInitDist);  
  Zoltan_Bind_Param(PHG_params, "PHG_PROCESSOR_REDUCTION_LIMIT",
		                 (void*) &hgp->ProRedL);
  Zoltan_Bind_Param(PHG_params, "PHG_REPART_MULTIPLIER",
		                 (void*) &hgp->RepartMultiplier);
  Zoltan_Bind_Param(PHG_params, "PATOH_ALLOC_POOL0",
                                 (void*) &hgp->patoh_alloc_pool0);
  Zoltan_Bind_Param(PHG_params, "PATOH_ALLOC_POOL1",
                                 (void*) &hgp->patoh_alloc_pool1);
  
  
  /* Set default values */
  strncpy(hgp->hgraph_pkg,           "phg", MAX_PARAM_STRING_LEN);
  strncpy(hgp->convert_str,    "neighbors", MAX_PARAM_STRING_LEN);
  strncpy(hgp->redm_str,             "agg", MAX_PARAM_STRING_LEN);
  hgp->match_array_type = 0;
  strncpy(hgp->redm_fast,          "l-ipm", MAX_PARAM_STRING_LEN);
  strncpy(hgp->coarsepartition_str, "auto", MAX_PARAM_STRING_LEN);
  strncpy(hgp->refinement_str,       "fm2", MAX_PARAM_STRING_LEN);
  strncpy(hgp->parkway_serpart,    "patoh", MAX_PARAM_STRING_LEN);
  strncpy(cut_objective,    "connectivity", MAX_PARAM_STRING_LEN);
  strncpy(add_obj_weight,           "none", MAX_PARAM_STRING_LEN);
  strncpy(edge_weight_op,            "max", MAX_PARAM_STRING_LEN);
  /* LB.Approach is initialized to "REPARTITION", and set in Set_Key_Params  */
  strncpy(hgp->hgraph_method,  zz->LB.Approach, MAX_PARAM_STRING_LEN);
  if (!strcasecmp(zz->LB.Approach,"REFINE")) 
    hgp->useMultilevel = 0;
  else
    hgp->useMultilevel = 1;

  hgp->use_timers = 0;
  hgp->LocalCoarsePartition = 0;
  hgp->edge_scaling = 0;
  hgp->vtx_scaling = 0;
  hgp->vtx_scal_size = 0;
  hgp->vtx_scal = NULL;  /* Array for storing vertex degree scale vector. 
                            Should perhaps go in hg structure, not the
                            param struct? */
  hgp->connectivity_cut = 1; 
  hgp->visit_order = 0;  /* Random */
  hgp->check_graph = 0;
  hgp->bal_tol = zz->LB.Imbalance_Tol[0]; /* Make vector for multiconstraint */
  hgp->bal_tol_adjustment = 0.7;
  hgp->nCand = 100;
  hgp->redl = MAX(2*zz->LB.Num_Global_Parts, 100);
  hgp->output_level = PHG_DEBUG_NONE;
  hgp->final_output = 0;
  hgp->nProc_x_req = -1;
  hgp->nProc_y_req = -1;
  hgp->kway = 0;
  hgp->fm_loop_limit = 10;
  hgp->fm_max_neg_move = 250;  
  hgp->refinement_quality = 1;
  hgp->RandomizeInitDist = 0;
  hgp->EdgeSizeThreshold = 0.25;
  hgp->MatchEdgeSizeThreshold = 500;  
  hgp->hybrid_keep_factor = 0.;
  hgp->ProRedL = 0.0; /* UVCUVC: CHECK default set to 0 until we run more experiments */
  hgp->RepartMultiplier = 100.;
  hgp->patoh_alloc_pool0 = 0;
  hgp->patoh_alloc_pool1 = 0;
  hgp->UseFixedVtx = 0;
  hgp->UsePrefPart = 0;
  
  /* Get application values of parameters. */
  err = Zoltan_Assign_Param_Vals(zz->Params, PHG_params, zz->Debug_Level, 
          zz->Proc, zz->Debug_Proc);
  
  nProc = zz->Num_Proc;
  usePrimeComm = 0;

  /* Parse add_obj_weight parameter */

  if (!strcasecmp(add_obj_weight, "none")) {
    hgp->add_obj_weight = PHG_ADD_NO_WEIGHT;
    hgp->part_sizes = part_sizes;
  }
  else if (zz->Obj_Weight_Dim > 0) {
    /* Do not add_obj_weight until multiconstraint PHG is implemented */
    ZOLTAN_PRINT_WARN(zz->Proc, yo,
     "Both application supplied *and* ADD_OBJ_WEIGHT "
     "calculated vertex weights were provided.");
    ZOLTAN_PRINT_WARN(zz->Proc, yo,
      "Only the first application supplied weight per vertex will be used.");
    hgp->add_obj_weight = PHG_ADD_NO_WEIGHT;
    hgp->part_sizes = part_sizes;
  } 
  else {
    if (!strcasecmp(add_obj_weight, "vertices")){
      hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT;
    } else if (!strcasecmp(add_obj_weight, "unit")){
      hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT;
    } else if (!strcasecmp(add_obj_weight, "vertex degree")){
      hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT;
    } else if (!strcasecmp(add_obj_weight, "nonzeros")){
      hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT;
    } else if (!strcasecmp(add_obj_weight, "pins")){
      hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT;
    } else{
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid ADD_OBJ_WEIGHT parameter.\n");
      err = ZOLTAN_WARN;
    }
    /* Set hgp->part_sizes to new array of part_sizes with added obj weight. */
    if (part_sizes)
      err = Zoltan_LB_Add_Part_Sizes_Weight(zz, 
                          (zz->Obj_Weight_Dim ? zz->Obj_Weight_Dim : 1), 
                          zz->Obj_Weight_Dim+1, 
                          part_sizes, &(hgp->part_sizes));
  }

  if ((zz->Obj_Weight_Dim==0) &&      /* no application supplied weights */
      (hgp->add_obj_weight==PHG_ADD_NO_WEIGHT)){ /* no calculated weight */

    hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT; /* default object weight */
  }

  if (!strcasecmp(cut_objective, "default")
      || !strcasecmp(cut_objective, "connectivity"))
      hgp->connectivity_cut = 1;
  else if (!strcasecmp(cut_objective, "hyperedges"))
      hgp->connectivity_cut = 0;
  else {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid PHG_CUT_OBJECTIVE parameter.\n");
      goto End;
  }

  if (!strcasecmp(edge_weight_op, "max")){
    hgp->edge_weight_op = PHG_MAX_EDGE_WEIGHTS;
  } else if (!strcasecmp(edge_weight_op, "add")){
    hgp->edge_weight_op = PHG_ADD_EDGE_WEIGHTS;
  } else if (!strcasecmp(edge_weight_op, "error")){
    hgp->edge_weight_op = PHG_FLAG_ERROR_EDGE_WEIGHTS;
  } else{
    ZOLTAN_PRINT_ERROR(zz->Proc, yo,
      "Invalid PHG_EDGE_WEIGHT_OPERATION parameter.\n");
    err = ZOLTAN_WARN;
  }

  if ((strcasecmp(method, "PARTITION")) &&
      (strcasecmp(method, "REPARTITION")) &&
      (strcasecmp(method, "REFINE"))) {
    sprintf(buf,"%s is not a valid hypergraph method\n",method);
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, buf);
    err = ZOLTAN_FATAL;
    goto End;
  }

  /* Adjust refinement parameters using hgp->refinement_quality */
  if (hgp->refinement_quality < 0.5/hgp->fm_loop_limit) 
    /* No refinement */
    strncpy(hgp->refinement_str,      "no",   MAX_PARAM_STRING_LEN);
  else {
    /* Scale FM parameters */
    hgp->fm_loop_limit   *= hgp->refinement_quality;
    hgp->fm_max_neg_move *= hgp->refinement_quality;
  }

  if (!strcasecmp(package, "PHG")){
    /* Test to determine whether we should change the number of processors
       used for partitioning to make more efficient 2D decomposition */

    if (hgp->nProc_x_req != 1 && hgp->nProc_y_req != 1)  /* Want 2D decomp */
      if (zz->Num_Proc > SMALL_PRIME && Zoltan_PHG_isPrime(zz->Num_Proc)) 
        /* 2D data decomposition is requested but we have a prime 
         * number of processors. */
        usePrimeComm = 1;

    if ((!strcasecmp(method, "REPARTITION"))){
        zz->LB.Remap_Flag = 0;
    }

    if ((!strcasecmp(method, "REPARTITION")) ||
        (!strcasecmp(method, "REFINE"))) {
        hgp->fm_loop_limit = 4; /* experimental evaluation showed that for
                                repartitioning/refinement small number of passes
                                is "good enough". These are all heuristics hence
                                it is possible to create a pathological cases; 
                                but in general this seems to be sufficient */
    }
    
    if (!hgp->useMultilevel) {
        /* don't do coarsening */
        strncpy(hgp->redm_str, "no", MAX_PARAM_STRING_LEN);

        /* we have modified all coarse partitioners to handle preferred part
           if user wants to choose one she can choose; otherwise default 
           partitioner
           (greedy growing) does work better than previous default partitioning
           for phg_refine ("no"). */        
        hgp->UsePrefPart = 1;

    }
    if (!strcasecmp(method, "REFINE") && hgp->useMultilevel){
        /* UVCUVC: as a heuristic we prefer local matching;
           in our experiments for IPDPS'07 and WileyChapter multilevel_refine
           didn't prove itself useful; it is too costly even with local matching
           hence it will not be be released yet (i.e. not in v3). */
        strncpy(hgp->redm_str, "l-ipm", MAX_PARAM_STRING_LEN);                
        hgp->UsePrefPart = 1;
    }    
  }
  else if (!strcasecmp(package, "PARKWAY")){
    if (hgp->nProc_x_req>1) {
      err = ZOLTAN_FATAL;
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "ParKway requires nProc_x=1 or -1.");
      goto End;
    }
    hgp->nProc_x_req = 1;
  } 
  else if (!strcasecmp(package, "PATOH")){
    if (zz->Num_Proc>1) {
      err = ZOLTAN_FATAL;
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "PaToH only works with Num_Proc=1.");
      goto End;
    }
  }

  if (!usePrimeComm)
    MPI_Comm_dup(zz->Communicator, &communicator);
  else {
    MPI_Group newgrp, zzgrp;
    nProc--;
    MPI_Comm_group(zz->Communicator, &zzgrp);
    MPI_Group_excl(zzgrp, 1, &nProc, &newgrp);
    MPI_Comm_create(zz->Communicator, newgrp, &communicator);
    MPI_Group_free(&newgrp);
    MPI_Group_free(&zzgrp);
  }

  err = Zoltan_PHG_Set_2D_Proc_Distrib(zz, communicator, zz->Proc, 
                                       nProc, hgp->nProc_x_req, 
                                       hgp->nProc_y_req, 
                                       &hgp->globalcomm);
  if (err != ZOLTAN_OK) 
    goto End;

  /* Convert strings to function pointers. */
  err = Zoltan_PHG_Set_Part_Options (zz, hgp);
  
End:
  return err;
}
Ejemplo n.º 3
0
/*  Main partitioning function for hypergraph partitioning. */
int Zoltan_PHG_Partition (
  ZZ *zz,               /* Zoltan data structure */
  HGraph *hg,           /* Input hypergraph to be partitioned */
  int p,                /* Input:  number partitions to be generated */
  float *part_sizes,    /* Input:  array of length p containing percentages
                           of work to be assigned to each partition */
  Partition parts,      /* Input:  initial partition #s; aligned with vtx 
                           arrays. 
                           Output:  computed partition #s */
  PHGPartParams *hgp)   /* Input:  parameters for hgraph partitioning. */
{

  PHGComm *hgc = hg->comm;
  VCycle  *vcycle=NULL, *del=NULL;
  int  i, err = ZOLTAN_OK, middle;
  ZOLTAN_GNO_TYPE origVpincnt; /* for processor reduction test */
  ZOLTAN_GNO_TYPE prevVcnt     = 2*hg->dist_x[hgc->nProc_x]; /* initialized so that the */
  ZOLTAN_GNO_TYPE prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; /* while loop will be entered
				 		               before any coarsening */
  ZOLTAN_GNO_TYPE tot_nPins, local_nPins;
  MPI_Datatype zoltan_gno_mpi_type;
  char *yo = "Zoltan_PHG_Partition";
  int do_timing = (hgp->use_timers > 1);
  int fine_timing = (hgp->use_timers > 2);
  int vcycle_timing = (hgp->use_timers > 4 && hgp->ProRedL == 0);
  short refine = 0;
  struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
  int reset_geometric_matching = 0;
  char reset_geometric_string[4];

  ZOLTAN_TRACE_ENTER(zz, yo);

  zoltan_gno_mpi_type = Zoltan_mpi_gno_type();
    
  if (do_timing) {
    if (timer->vcycle < 0) 
      timer->vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle");
    if (timer->procred < 0) 
      timer->procred = Zoltan_Timer_Init(zz->ZTime, 0, "Processor Reduction");
    if (timer->match < 0) 
      timer->match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching");
    if (timer->coarse < 0) 
      timer->coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening");
    if (timer->coarsepart < 0)
      timer->coarsepart = Zoltan_Timer_Init(zz->ZTime, 1,
                                           "Coarse_Partition");
    if (timer->refine < 0) 
      timer->refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement");
    if (timer->project < 0) 
      timer->project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up");

    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

  local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;

  MPI_Allreduce(&local_nPins,&tot_nPins,1,zoltan_gno_mpi_type,MPI_SUM,hgc->Communicator);

  origVpincnt = tot_nPins;

  if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) {
    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  /* For geometric coarsening, hgp->matching pointer and string are reset
   * after geometric_levels of coarsening.  Will need to reset them after
   * this vcycle is completed.  Capture that fact now!  */
  if (!strcasecmp(hgp->redm_str, "rcb") || !strcasecmp(hgp->redm_str, "rib")) {
    reset_geometric_matching = 1;
    strcpy(reset_geometric_string, hgp->redm_str);
  }

  /****** Coarsening ******/    
#define COARSEN_FRACTION_LIMIT 0.9  /* Stop if we don't make much progress */
  while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > (ZOLTAN_GNO_TYPE)hg->redl)
	 && ((hg->dist_x[hgc->nProc_x] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) /* prevVcnt initialized to 2*hg->dist_x[hgc->nProc_x] */
	     || (hg->dist_y[hgc->nProc_y] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) /* prevVedgecnt initialized to 2*hg->dist_y[hgc->nProc_y] */
    && hg->dist_y[hgc->nProc_y] && hgp->matching) {
      ZOLTAN_GNO_TYPE *match = NULL;
      VCycle *coarser=NULL, *redistributed=NULL;
        
      prevVcnt     = hg->dist_x[hgc->nProc_x];
      prevVedgecnt = hg->dist_y[hgc->nProc_y];

#ifdef _DEBUG      
      /* UVC: load balance stats */
      Zoltan_PHG_LoadBalStat(zz, hg);
#endif
      
      if (hgp->output_level >= PHG_DEBUG_LIST) {
          uprintf(hgc,
                  "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
                  hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
                  hgp->redm_str,
                  hgp->coarsepartition_str, hgp->refinement_str, p);
          if (hgp->output_level > PHG_DEBUG_LIST) {
              err = Zoltan_HG_Info(zz, hg);
              if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
                  goto End;
          }
      }
      if (hgp->output_level >= PHG_DEBUG_PLOT)
        Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
         "coarsening plot");

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->match, hgc->Communicator);
      }
      if (vcycle_timing) {
        if (vcycle->timer_match < 0) {
          char str[80];
          sprintf(str, "VC Matching %d", hg->info);
          vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match,
                           hgc->Communicator);
      }

      /* Allocate and initialize Matching Array */
      if (hg->nVtx && !(match = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC (hg->nVtx*sizeof(ZOLTAN_GNO_TYPE)))) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_MEMERR;
      }
      for (i = 0; i < hg->nVtx; i++)
        match[i] = i;
        
      /* Calculate matching (packing or grouping) */

      err = Zoltan_PHG_Matching (zz, hg, match, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
        ZOLTAN_FREE (&match);
        goto End;
      }
      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match,
                          hgc->Communicator);

      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->match, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->coarse, hgc->Communicator);
      }

      if (vcycle_timing) {
        if (vcycle->timer_coarse < 0) {
          char str[80];
          sprintf(str, "VC Coarsening %d", hg->info);
          vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str);
        }
        ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse,
                           hgc->Communicator);
      }
            
      if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) {
        ZOLTAN_FREE (&match);
        ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL.");
        goto End;
      }

      /* Construct coarse hypergraph and LevelMap */
      err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap,
       &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, 
       &vcycle->comm_plan, hgp);

      if (err != ZOLTAN_OK && err != ZOLTAN_WARN) 
        goto End;

      if (vcycle_timing)
        ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse,
                          hgc->Communicator);
        
      if (do_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarse, hgc->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }

      ZOLTAN_FREE (&match);

      if ((err=allocVCycle(coarser))!= ZOLTAN_OK)
        goto End;
      vcycle = coarser;
      hg = vcycle->hg;

      if (hgc->nProc > 1 && hgp->ProRedL > 0) {
        local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins;
	MPI_Allreduce(&local_nPins, &tot_nPins, 1, zoltan_gno_mpi_type, MPI_SUM,
		      hgc->Communicator);

	if (tot_nPins < (ZOLTAN_GNO_TYPE)(hgp->ProRedL * origVpincnt + 0.5)) {
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->procred, hgc->Communicator);
	  }
	  /* redistribute to half the processors */
	  origVpincnt = tot_nPins; /* update for processor reduction test */

	  if(hg->nVtx&&!(hg->vmap=(int*)ZOLTAN_MALLOC(hg->nVtx*sizeof(int)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: hg->vmap");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; i++)
	    hg->vmap[i] = i;

	  middle = (int)((float) (hgc->nProc-1) * hgp->ProRedL);

	  if (hgp->nProc_x_req!=1&&hgp->nProc_y_req!=1) { /* Want 2D decomp */
	    if ((middle+1) > SMALL_PRIME && Zoltan_PHG_isPrime(middle+1))
	      --middle; /* if it was prime just use one less #procs (since
			   it should be bigger than SMALL_PRIME it is safe to
			   decrement) */
	  }

	  if (!(hgc = (PHGComm*) ZOLTAN_MALLOC (sizeof(PHGComm)))) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: PHGComm");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  if (!(redistributed=newVCycle(zz,NULL,NULL,vcycle,vcycle_timing))) {
	    ZOLTAN_FREE (&hgc);
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "redistributed is NULL.");
	    goto End;
	  }

	  Zoltan_PHG_Redistribute(zz,hgp,hg,0,middle,hgc, redistributed->hg,
				  &vcycle->vlno,&vcycle->vdest);
	  if (hgp->UseFixedVtx || hgp->UsePrefPart)
            redistributed->hg->bisec_split = hg->bisec_split;

	  if ((err=allocVCycle(redistributed))!= ZOLTAN_OK)
	    goto End;
	  vcycle = redistributed;

	  if (hgc->myProc < 0)
	    /* I'm not in the redistributed part so I should go to uncoarsening
	       refinement and wait */ {
	    if (fine_timing) {
	      if (timer->cpgather < 0)
		timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
	      if (timer->cprefine < 0)
		timer->cprefine =Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
	      if (timer->cpart < 0)
		timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");
	    }
	    if (do_timing) {
	      ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	      ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	    }
	    goto Refine;
	  }

	  hg = vcycle->hg;
	  hg->redl = hgp->redl; /* not set with hg creation */
	  if (do_timing) {
	    ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator);
	    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	  }
	}
      }
  }

  if (hgp->output_level >= PHG_DEBUG_LIST) {
    uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n",
     hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
     hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p);
    if (hgp->output_level > PHG_DEBUG_LIST) {
      err = Zoltan_HG_Info(zz, hg);
      if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
        goto End;
    }
  }
  if (hgp->output_level >= PHG_DEBUG_PLOT)
    Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL,
     "coarsening plot");

  /* free array that may have been allocated in matching */
  if (hgp->vtx_scal) {
    hgp->vtx_scal_size = 0;
    ZOLTAN_FREE(&(hgp->vtx_scal));
  }

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->coarsepart, hgc->Communicator);
  }

  /****** Coarse Partitioning ******/

  err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp);

  if (err != ZOLTAN_OK && err != ZOLTAN_WARN)
    goto End;

  if (do_timing) {
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarsepart, hgc->Communicator);
    ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
  }

Refine:
  del = vcycle;
  refine = 1;

  /****** Uncoarsening/Refinement ******/
  while (vcycle) {
    VCycle *finer = vcycle->finer;
    hg = vcycle->hg;

    if (refine && hgc->myProc >= 0) {
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->refine, hgc->Communicator);
      }
      if (vcycle_timing) {
	if (vcycle->timer_refine < 0) {
	  char str[80];
	  sprintf(str, "VC Refinement %d", hg->info);
	  vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str);
	}
	ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine,
			   hgc->Communicator);
      }

      err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp);
        
      if (do_timing) {
	ZOLTAN_TIMER_STOP(zz->ZTime, timer->refine, hgc->Communicator);
	ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
      }
      if (vcycle_timing)
	ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine,
			  hgc->Communicator);

                          
      if (hgp->output_level >= PHG_DEBUG_LIST)     
	uprintf(hgc, 
		"FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n",
		hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, 
		hgp->redm_str,
		hgp->coarsepartition_str, hgp->refinement_str, p,
		Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, 0, p, 
                                           vcycle->Part),
		Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err));

      if (hgp->output_level >= PHG_DEBUG_PLOT)
	Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part,
			"partitioned plot");
    }

    if (finer)  {
      int *rbuffer;
            
      /* Project coarse partition to fine partition */
      if (finer->comm_plan) {
	refine = 1;
	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->project, hgc->Communicator);
	}
	if (vcycle_timing) {
	  if (vcycle->timer_project < 0) {
	    char str[80];
	    sprintf(str, "VC Project Up %d", hg->info);
	    vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str);
	  }
	  ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project,
			     hgc->Communicator);
	}
        
	/* easy to assign partitions to internal matches */
	for (i = 0; i < finer->hg->nVtx; i++)
	  if (finer->LevelMap[i] >= 0)   /* if considers only the local vertices */
	    finer->Part[i] = vcycle->Part[finer->LevelMap[i]];
          
	/* now that the course partition assignments have been propagated */
	/* upward to the finer level for the local vertices, we need to  */    
	/* fill the LevelData (matched pairs of a local vertex with a    */
	/* off processor vertex) with the partition assignment of the    */
	/* local vertex - can be done totally in the finer level!        */    
	for (i = 0; i < finer->LevelCnt; i++)  {
	  ++i;          /* skip over off processor lno */
	  finer->LevelData[i] = finer->Part[finer->LevelData[i]]; 
	}
            
	/* allocate rec buffer to exchange LevelData information */
	rbuffer = NULL;
	if (finer->LevelSndCnt > 0)  {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int));
	  if (!rbuffer)    {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}       
      
	/* get partition assignments from owners of externally matched vtxs */  
	Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i);
	Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, 
         (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer);

	/* process data to assign partitions to expernal matches */
	for (i = 0; i < 2 * finer->LevelSndCnt;)  {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];      
	  finer->Part[lno] = partition;         
	}

	ZOLTAN_FREE (&rbuffer);                  
	Zoltan_Comm_Destroy (&finer->comm_plan);                   

	if (do_timing) {
	  ZOLTAN_TIMER_STOP(zz->ZTime, timer->project, hgc->Communicator);
	  ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator);
	}
	if (vcycle_timing)
	  ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project,
			    hgc->Communicator);
      } else {
	int *sendbuf = NULL, size;
	refine = 0;
	/* ints local and partition numbers */
	if (finer->vlno) {
	  sendbuf = (int*) ZOLTAN_MALLOC (2 * hg->nVtx * sizeof(int));
	  if (!sendbuf) {
	    ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }

	  for (i = 0; i < hg->nVtx; ++i) {
	    sendbuf[2 * i] = finer->vlno[i];     /* assign local numbers */
	    sendbuf[2 * i + 1] = vcycle->Part[i];/* assign partition numbers */
	  }
	}

	ZOLTAN_FREE (&hgc);
	hgc = finer->hg->comm; /* updating hgc is required when the processors
				   change */
	/* Create comm plan to unredistributed processors */
	err = Zoltan_Comm_Create(&finer->comm_plan, finer->vlno ? hg->nVtx : 0,
				 finer->vdest, hgc->Communicator, COMM_TAG+2,
				 &size);

	if (err != ZOLTAN_OK && err != ZOLTAN_WARN) {
	  ZOLTAN_PRINT_ERROR(hgc->myProc, yo, "Zoltan_Comm_Create failed.");
	  goto End;
	}

	/* allocate rec buffer to exchange sendbuf information */
	rbuffer = NULL;
	if (finer->hg->nVtx) {
	  rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->hg->nVtx * sizeof(int));

	  if (!rbuffer) {
	    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
            ZOLTAN_TRACE_EXIT(zz, yo);
	    return ZOLTAN_MEMERR;
	  }
	}

	/* Use plan to send partitions to the unredistributed processors */

	Zoltan_Comm_Do(finer->comm_plan, COMM_TAG+3, (char *) sendbuf,
		       2*sizeof(int), (char *) rbuffer);

	MPI_Bcast(rbuffer, 2*finer->hg->nVtx, MPI_INT, 0, hgc->col_comm);
	
	/* process data to assign partitions to unredistributed processors */
	for (i = 0; i < 2 * finer->hg->nVtx;) {
	  int lno, partition;
	  lno       = rbuffer[i++];
	  partition = rbuffer[i++];
	  finer->Part[lno] = partition;
	}

	if (finer->vlno)
	  ZOLTAN_FREE (&sendbuf);

	ZOLTAN_FREE (&rbuffer);
	Zoltan_Comm_Destroy (&finer->comm_plan);
      }
    }

    vcycle = finer;
  }       /* while (vcycle) */
    
End:
  vcycle = del;
  while (vcycle) {
    if (vcycle_timing) {
      Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout);
      Zoltan_Timer_Destroy(&vcycle->timer);
    }
    if (vcycle->finer) {   /* cleanup by level */
      Zoltan_HG_HGraph_Free (vcycle->hg);

      if (vcycle->LevelData)
	Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg);
      else if (vcycle->vlno)
	Zoltan_Multifree (__FILE__, __LINE__, 5, &vcycle->Part, &vcycle->vdest,
			  &vcycle->vlno, &vcycle->LevelMap, &vcycle->hg);
      else
	Zoltan_Multifree (__FILE__, __LINE__, 3, &vcycle->Part,
			  &vcycle->LevelMap, &vcycle->hg);
    }
    else                   /* cleanup top level */
      Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap,
                        &vcycle->LevelData);
    del = vcycle;
    vcycle = vcycle->finer;
    ZOLTAN_FREE(&del);
  }

  if (reset_geometric_matching) {
    strcpy(hgp->redm_str, reset_geometric_string);
    Zoltan_PHG_Set_Matching_Fn(hgp);
  }

  if (do_timing)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator);
  ZOLTAN_TRACE_EXIT(zz, yo) ;
  return err;
}