/* Main partitioning function for hypergraph partitioning. */ int Zoltan_PHG_Partition ( ZZ *zz, /* Zoltan data structure */ HGraph *hg, /* Input hypergraph to be partitioned */ int p, /* Input: number partitions to be generated */ float *part_sizes, /* Input: array of length p containing percentages of work to be assigned to each partition */ Partition parts, /* Input: initial partition #s; aligned with vtx arrays. Output: computed partition #s */ PHGPartParams *hgp, /* Input: parameters for hgraph partitioning. */ int level) { PHGComm *hgc = hg->comm; VCycle *vcycle=NULL, *del=NULL; int i, err = ZOLTAN_OK; int prevVcnt = 2*hg->dist_x[hgc->nProc_x]; int prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; char *yo = "Zoltan_PHG_Partition"; static int timer_match = -1, /* Timers for various stages */ timer_coarse = -1, /* Declared static so we can accumulate */ timer_refine = -1, /* times over calls to Zoltan_PHG_Partition */ timer_coarsepart = -1, timer_project = -1, timer_vcycle = -1; /* times everything in Vcycle not included in above timers */ int do_timing = (hgp->use_timers > 1); int vcycle_timing = (hgp->use_timers > 4); ZOLTAN_TRACE_ENTER(zz, yo); if (do_timing) { if (timer_vcycle < 0) timer_vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle"); if (timer_match < 0) timer_match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching"); if (timer_coarse < 0) timer_coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening"); if (timer_coarsepart < 0) timer_coarsepart = Zoltan_Timer_Init(zz->ZTime, 1, "Coarse_Partition"); if (timer_refine < 0) timer_refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement"); if (timer_project < 0) timer_project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up"); ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator); } if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL."); return ZOLTAN_MEMERR; } /****** Coarsening ******/ #define COARSEN_FRACTION_LIMIT 0.9 /* Stop if we don't make much progress */ while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > hg->redl) && ((hg->dist_x[hgc->nProc_x] < (int) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) || (hg->dist_y[hgc->nProc_y] < (int) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) && hg->dist_y[hgc->nProc_y] && hgp->matching) { int *match = NULL; VCycle *coarser=NULL; prevVcnt = hg->dist_x[hgc->nProc_x]; prevVedgecnt = hg->dist_y[hgc->nProc_y]; #ifdef _DEBUG /* UVC: load balance stats */ Zoltan_PHG_LoadBalStat(zz, hg); #endif if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_match, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_match < 0) { char str[80]; sprintf(str, "VC Matching %d", hg->info); vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match, hgc->Communicator); } /* Allocate and initialize Matching Array */ if (hg->nVtx && !(match = (int*) ZOLTAN_MALLOC (hg->nVtx*sizeof(int)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array"); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; i++) match[i] = i; /* Calculate matching (packing or grouping) */ err = Zoltan_PHG_Matching (zz, hg, match, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_FREE ((void**) &match); goto End; } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_match, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_coarse, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_coarse < 0) { char str[80]; sprintf(str, "VC Coarsening %d", hg->info); vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); } if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) { ZOLTAN_FREE ((void**) &match); ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL."); goto End; } /* Construct coarse hypergraph and LevelMap */ err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap, &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, &vcycle->comm_plan, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarse, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator); } ZOLTAN_FREE ((void**) &match); if ((err=allocVCycle(coarser))!= ZOLTAN_OK) goto End; vcycle = coarser; hg = vcycle->hg; } if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); /* free array that may have been allocated in matching */ if (hgp->vtx_scal) ZOLTAN_FREE(&(hgp->vtx_scal)); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_coarsepart, hgc->Communicator); } /****** Coarse Partitioning ******/ err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_coarsepart, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator); } del = vcycle; /****** Uncoarsening/Refinement ******/ while (vcycle) { VCycle *finer = vcycle->finer; hg = vcycle->hg; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_refine, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_refine < 0) { char str[80]; sprintf(str, "VC Refinement %d", hg->info); vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine, hgc->Communicator); } err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_refine, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine, hgc->Communicator); if (hgp->output_level >= PHG_DEBUG_LIST) uprintf(hgc, "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p, Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, p, vcycle->Part), Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err)); if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part, "partitioned plot"); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_project, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_project < 0) { char str[80]; sprintf(str, "VC Project Up %d", hg->info); vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project, hgc->Communicator); } /* Project coarse partition to fine partition */ if (finer) { int *rbuffer; /* easy to undo internal matches */ for (i = 0; i < finer->hg->nVtx; i++) if (finer->LevelMap[i] >= 0) finer->Part[i] = vcycle->Part[finer->LevelMap[i]]; /* fill sendbuffer with part data for external matches I owned */ for (i = 0; i < finer->LevelCnt; i++) { ++i; /* skip return lno */ finer->LevelData[i] = finer->Part[finer->LevelData[i]]; } /* allocate rec buffer */ rbuffer = NULL; if (finer->LevelSndCnt > 0) { rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int)); if (!rbuffer) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory."); return ZOLTAN_MEMERR; } } /* get partition assignments from owners of externally matchted vtxs */ Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i); Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer); /* process data to undo external matches */ for (i = 0; i < 2 * finer->LevelSndCnt;) { int lno, partition; lno = rbuffer[i++]; partition = rbuffer[i++]; finer->Part[lno] = partition; } ZOLTAN_FREE (&rbuffer); Zoltan_Comm_Destroy (&finer->comm_plan); } if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer_project, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer_vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project, hgc->Communicator); vcycle = finer; } /* while (vcycle) */ End: vcycle = del; while (vcycle) { if (vcycle_timing) { Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout); Zoltan_Timer_Destroy(&vcycle->timer); } if (vcycle->finer) { /* cleanup by level */ Zoltan_HG_HGraph_Free (vcycle->hg); Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part, &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg); } else /* cleanup top level */ Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap, &vcycle->LevelData); del = vcycle; vcycle = vcycle->finer; ZOLTAN_FREE(&del); } if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer_vcycle, hgc->Communicator); ZOLTAN_TRACE_EXIT(zz, yo) ; return err; }
int Zoltan_PHG( ZZ *zz, /* The Zoltan structure */ float *part_sizes, /* Input: Array of size zz->Num_Global_Parts containing the percentage of work assigned to each partition. */ int *num_imp, /* not computed */ ZOLTAN_ID_PTR *imp_gids, /* not computed */ ZOLTAN_ID_PTR *imp_lids, /* not computed */ int **imp_procs, /* not computed */ int **imp_to_part, /* not computed */ int *num_exp, /* number of objects to be exported */ ZOLTAN_ID_PTR *exp_gids, /* global ids of objects to be exported */ ZOLTAN_ID_PTR *exp_lids, /* local ids of objects to be exported */ int **exp_procs, /* list of processors to export to */ int **exp_to_part ) /* list of partitions to which exported objs are assigned. */ { char *yo = "Zoltan_PHG"; ZHG *zoltan_hg = NULL; PHGPartParams hgp; /* Hypergraph parameters. */ HGraph *hg = NULL; /* Hypergraph itself */ Partition parts = NULL; /* Partition assignments in 2D distribution. */ int err = ZOLTAN_OK, p=0; struct phg_timer_indices *timer = NULL; int do_timing = 0; ZOLTAN_TRACE_ENTER(zz, yo); /* Initialization of return arguments. */ *num_imp = *num_exp = -1; *imp_gids = *exp_gids = NULL; *imp_lids = *exp_lids = NULL; *imp_procs = *exp_procs = NULL; /* Initialize HG parameters. */ err = Zoltan_PHG_Initialize_Params(zz, part_sizes, &hgp); if (err != ZOLTAN_OK) goto End; if (hgp.use_timers) { if (!zz->LB.Data_Structure) { zz->LB.Data_Structure = (struct phg_timer_indices *) ZOLTAN_MALLOC(sizeof(struct phg_timer_indices)); initialize_timer_indices((struct phg_timer_indices *)zz->LB.Data_Structure); } timer = zz->LB.Data_Structure; if (timer->all < 0) timer->all = Zoltan_Timer_Init(zz->ZTime, 1, "Zoltan_PHG"); } if (hgp.use_timers > 1) { do_timing = 1; if (timer->build < 0) timer->build = Zoltan_Timer_Init(zz->ZTime, 1, "Build"); if (timer->setupvmap < 0) timer->setupvmap = Zoltan_Timer_Init(zz->ZTime, 0, "Vmaps"); } if (hgp.use_timers) ZOLTAN_TIMER_START(zz->ZTime, timer->all, zz->Communicator); if (do_timing) ZOLTAN_TIMER_START(zz->ZTime, timer->build, zz->Communicator); /* build initial Zoltan hypergraph from callback functions. */ err = Zoltan_PHG_Build_Hypergraph (zz, &zoltan_hg, &parts, &hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error building hypergraph."); goto End; } if (zoltan_hg->GnObj == 0){ /* degenerate case - no objects to partition */ hgp.final_output = 0; goto End; } hg = &zoltan_hg->HG; p = zz->LB.Num_Global_Parts; zoltan_hg->HG.redl = MAX(hgp.redl, p); /* redl needs to be dynamic */ /* RTHRTH -- redl may need to be scaled by number of procs */ /* EBEB -- at least make sure redl > #procs */ if (hgp.UseFixedVtx) hg->bisec_split = 1; /* this will be used only #parts=2 otherwise rdivide will set to appropriate value */ if (hgp.UsePrefPart || hgp.UseFixedVtx) { /* allocate memory for pref_part */ if (hg->nVtx && !(hg->pref_part = (int*) ZOLTAN_MALLOC (sizeof(int) * hg->nVtx))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error building hypergraph."); goto End; } } if (hgp.UsePrefPart) /* copy input parts as pref_part; UVCUVC: TODO: this code (and alloc of pref_part) should go to Build_Hypergraph later */ memcpy(hg->pref_part, parts, sizeof(int) * hg->nVtx); if (hgp.UseFixedVtx) { int i; for (i=0; i<hg->nVtx; ++i) if (hg->fixed_part[i]>=0) hg->pref_part[i] = hg->fixed_part[i]; else if (!hgp.UsePrefPart) hg->pref_part[i] = -1; } hgp.UsePrefPart |= hgp.UseFixedVtx; if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->build, zz->Communicator); /* UVCUVC DEBUG PRINT uprintf(hg->comm, "Zoltan_PHG kway=%d #parts=%d\n", hgp.kway, zz->LB.Num_Global_Parts); */ if (!strcasecmp(hgp.hgraph_pkg, "PARKWAY")){ if (do_timing) { if (timer->parkway < 0) timer->parkway = Zoltan_Timer_Init(zz->ZTime, 0, "PHG_ParKway"); ZOLTAN_TIMER_START(zz->ZTime, timer->parkway, zz->Communicator); } err = Zoltan_PHG_ParKway(zz, hg, p, parts, &hgp); if (err != ZOLTAN_OK) goto End; if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->parkway, zz->Communicator); } else if (!strcasecmp(hgp.hgraph_pkg, "PATOH")){ if (hgp.use_timers > 1) { if (timer->patoh < 0) timer->patoh = Zoltan_Timer_Init(zz->ZTime, 0, "HG_PaToH"); ZOLTAN_TIMER_START(zz->ZTime, timer->patoh, zz->Communicator); } err = Zoltan_PHG_PaToH(zz, hg, p, parts, &hgp); if (err != ZOLTAN_OK) goto End; if (hgp.use_timers > 1) ZOLTAN_TIMER_STOP(zz->ZTime, timer->patoh, zz->Communicator); } else { /* it must be PHG */ /* UVC: if it is bisection anyways; no need to create vmap etc; rdivide is going to call Zoltan_PHG_Partition anyways... */ if (hgp.globalcomm.Communicator != MPI_COMM_NULL) { /* This processor is part of the 2D data distribution; it should participate in partitioning. */ if (hgp.kway || zz->LB.Num_Global_Parts == 2) { /* call main V cycle routine */ err = Zoltan_PHG_Partition(zz, hg, p, hgp.part_sizes, parts, &hgp); if (err != ZOLTAN_OK) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error partitioning hypergraph."); goto End; } } else { int i; if (do_timing) ZOLTAN_TIMER_START(zz->ZTime, timer->setupvmap, zz->Communicator); /* vmap associates original vertices to sub hypergraphs */ if (hg->nVtx && !(hg->vmap = (int*) ZOLTAN_MALLOC(hg->nVtx*sizeof (int)))) { err = ZOLTAN_MEMERR; ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); goto End; } for (i = 0; i < hg->nVtx; ++i) hg->vmap[i] = i; if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->setupvmap, zz->Communicator); /* partition hypergraph */ err = Zoltan_PHG_rdivide (0, p-1, parts, zz, hg, &hgp, 0); if (hgp.output_level >= PHG_DEBUG_LIST) uprintf(hg->comm, "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %s/%s/%s/%s p=%d " "bal=%.2f cutl=%.2f\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hgp.convert_str, hgp.redm_str, hgp.coarsepartition_str, hgp.refinement_str, p, Zoltan_PHG_Compute_Balance(zz, hg, hgp.part_sizes, 0, p, parts), Zoltan_PHG_Compute_ConCut(hg->comm, hg, parts, p, &err)); if (err != ZOLTAN_OK) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error partitioning hypergraph."); goto End; } ZOLTAN_FREE (&hg->vmap); } #ifdef CHECK_LEFTALONE_VERTICES findAndSaveLeftAloneVertices(zz, hg, p, parts, &hgp); #endif } } if (!strcasecmp(hgp.hgraph_method, "REPARTITION")) { Zoltan_PHG_Remove_Repart_Data(zz, zoltan_hg, hg, &hgp); } /* UVC DEBUG PRINT if (!strcasecmp(hgp->hgraph_method, "REFINE")){ uprintf(hg->comm, "UVC ATTHEEND |V|=%6d |E|=%6d #pins=%6d p=%d bal=%.2f cutl=%.2f\n", hg->nVtx, hg->nEdge, hg->nPins, p, Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, p, parts), Zoltan_PHG_Compute_ConCut(hg->comm, hg, parts, p, &err)); detailed_balance_info(zz, hg, part_sizes, p, parts); } */ /* Initialize these timers here so their output is near end of printout */ if (do_timing) if (timer->retlist < 0) timer->retlist = Zoltan_Timer_Init(zz->ZTime, 1, "Return_Lists"); if (hgp.use_timers) if (timer->finaloutput < 0) timer->finaloutput = Zoltan_Timer_Init(zz->ZTime, 1, "Final_Output"); if (do_timing) ZOLTAN_TIMER_START(zz->ZTime, timer->retlist, zz->Communicator); /* Build Zoltan's Output_Parts, mapped from 2D distribution to input distribution. */ Zoltan_PHG_Output_Parts(zz, zoltan_hg, parts); /* Build Zoltan's return arguments. */ Zoltan_PHG_Return_Lists(zz, zoltan_hg, num_exp, exp_gids, exp_lids, exp_procs, exp_to_part); if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->retlist, zz->Communicator); End: if (err == ZOLTAN_MEMERR) ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Memory error.") else if (err != ZOLTAN_OK) ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Error partitioning hypergraph.") /* KDDKDD The following code prints a final quality result even when * KDDKDD phg_output_level is zero. It is useful for our tests and * KDDKDD data collection. */ if ((err == ZOLTAN_OK) && hgp.final_output) { static int nRuns=0; static double balsum = 0.0, cutlsum = 0.0, cutnsum = 0.0, movesum = 0.0, repartsum = 0.0; static double balmax = 0.0, cutlmax = 0.0, cutnmax = 0.0, movemax = 0.0, repartmax = 0.0; static double balmin = 1e100, cutlmin = 1e100, cutnmin = 1e100, movemin = 1e100, repartmin = 1e100; double bal = 0.; double cutl = 0.; /* Connnectivity cuts: sum_over_edges((npart-1)*ewgt) */ double cutn = 0.; /* Net cuts: sum_over_edges((nparts>1)*ewgt) */ double rlocal[2]; /* local cut stats for removed edges */ double rglobal[2]; /* global cut stats for removed edges */ int gnremove, i; double move=0.0, gmove; /* local and global migration costs */ double repart=0.0; /* total repartitioning cost: comcost x multiplier + migration_cost */ if (hgp.use_timers) { /* Do not include final output time in partitioning time */ ZOLTAN_TIMER_STOP(zz->ZTime, timer->all, zz->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->finaloutput, zz->Communicator); } if (hgp.globalcomm.Communicator != MPI_COMM_NULL) { /* Processor participated in partitioning */ bal = Zoltan_PHG_Compute_Balance(zz, hg, hgp.part_sizes, 0, zz->LB.Num_Global_Parts, parts); cutl= Zoltan_PHG_Compute_ConCut(hg->comm, hg, parts, zz->LB.Num_Global_Parts, &err); cutn = Zoltan_PHG_Compute_NetCut(hg->comm, hg, parts, zz->LB.Num_Global_Parts); for (i = 0; i < zoltan_hg->nObj; ++i) { /* uprintf(hg->comm, " obj[%d] = %d in=%d out=%d\n", i, zoltan_hg->AppObjSizes[i], zoltan_hg->Input_Parts[i], zoltan_hg->Output_Parts[i]); */ if (zoltan_hg->Input_Parts[i] != zoltan_hg->Output_Parts[i]) move += (double) ((zoltan_hg->AppObjSizes) ? zoltan_hg->AppObjSizes[i] : 1.0); } } if (!err) { /* Add in cut contributions from removed edges */ MPI_Allreduce(&(zoltan_hg->nRemove), &gnremove, 1, MPI_INT, MPI_SUM, zz->Communicator); if (gnremove) { err = Zoltan_PHG_Removed_Cuts(zz, zoltan_hg, rlocal); MPI_Allreduce(rlocal, rglobal, 2, MPI_DOUBLE,MPI_SUM,zz->Communicator); cutl += rglobal[0]; cutn += rglobal[1]; } MPI_Allreduce(&move, &gmove, 1, MPI_DOUBLE, MPI_SUM, zz->Communicator); repart = cutl*hgp.RepartMultiplier + gmove; repartsum += repart; if (repart > repartmax) repartmax = repart; if (repart < repartmin) repartmin = repart; movesum += gmove; if (gmove > movemax) movemax = gmove; if (gmove < movemin) movemin = gmove; cutlsum += cutl; if (cutl > cutlmax) cutlmax = cutl; if (cutl < cutlmin) cutlmin = cutl; cutnsum += cutn; if (cutn > cutnmax) cutnmax = cutn; if (cutn < cutnmin) cutnmin = cutn; balsum += bal; if (bal > balmax) balmax = bal; if (bal < balmin) balmin = bal; nRuns++; if (zz->Proc == 0) { uprintf(hg->comm, "STATS Runs %d bal CURRENT %f MAX %f MIN %f AVG %f\n", nRuns, bal, balmax, balmin, balsum/nRuns); uprintf(hg->comm, "STATS Runs %d cutl CURRENT %f MAX %f MIN %f AVG %f\n", nRuns, cutl, cutlmax, cutlmin, cutlsum/nRuns); uprintf(hg->comm, "STATS Runs %d cutn CURRENT %f MAX %f MIN %f AVG %f\n", nRuns, cutn, cutnmax, cutnmin, cutnsum/nRuns); uprintf(hg->comm, "STATS Runs %d %s CURRENT %f MAX %f MIN %f AVG %f\n", nRuns, (zoltan_hg->showMoveVol) ? "moveVol" : "moveCnt", gmove, movemax, movemin, movesum/nRuns); if (zoltan_hg->showMoveVol) uprintf(hg->comm, "STATS Runs %d repart CURRENT %f MAX %f MIN %f AVG %f\n", nRuns, repart, repartmax, repartmin, repartsum/nRuns); } } if (hgp.use_timers) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->finaloutput, zz->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->all, zz->Communicator); } } /* KDDKDD End of printing section. */ ZOLTAN_FREE(&parts); if (zoltan_hg != NULL) { Zoltan_PHG_Free_Hypergraph_Data(zoltan_hg); ZOLTAN_FREE (&zoltan_hg); } if (hgp.use_timers) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->all, zz->Communicator); if (hgp.globalcomm.Communicator != MPI_COMM_NULL) Zoltan_Timer_PrintAll(zz->ZTime, 0, hgp.globalcomm.Communicator, stdout); } if (hgp.globalcomm.row_comm != MPI_COMM_NULL) MPI_Comm_free(&(hgp.globalcomm.row_comm)); if (hgp.globalcomm.col_comm != MPI_COMM_NULL) MPI_Comm_free(&(hgp.globalcomm.col_comm)); if (hgp.globalcomm.Communicator != MPI_COMM_NULL) MPI_Comm_free(&(hgp.globalcomm.Communicator)); /* Free part_sizes if created new due to ADD_OBJ_WEIGHT */ if (hgp.part_sizes != part_sizes) ZOLTAN_FREE(&hgp.part_sizes); ZOLTAN_TRACE_EXIT(zz, yo); return err; }
int Zoltan_PHG_CoarsePartition( ZZ *zz, HGraph *phg, /* Input: coarse hypergraph -- distributed! */ int numPart, /* Input: number of partitions to generate. */ float *part_sizes, /* Input: array of size numPart listing target sizes (% of work) for the partitions */ Partition part, /* Input: array of initial partition assignments. Output: array of computed partition assignments. */ PHGPartParams *hgp /* Input: parameters to use. */ ) { /* * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph. * Typically, this routine is called at the bottom level in a * multilevel scheme (V-cycle). * It gathers the distributed hypergraph to each processor and computes * a decomposition of the serial hypergraph. * It computes a different partition on each processor * using different random numbers (and possibly also * different algorithms) and selects the best. */ char *yo = "Zoltan_PHG_CoarsePartition"; int ierr = ZOLTAN_OK; int i, si, j; static PHGComm scomm; /* Serial communicator info */ static int first_time = 1; HGraph *shg = NULL; /* Serial hypergraph gathered from phg */ int *spart = NULL; /* Partition vectors for shg. */ int *new_part = NULL; /* Ptr to new partition vector. */ float *bestvals = NULL; /* Best cut values found so far */ int worst, new_cand; float bal, cut, worst_cut; int fine_timing = (hgp->use_timers > 2); struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int local_coarse_part = hgp->LocalCoarsePartition; /* Number of iterations to try coarse partitioning on each proc. */ /* 10 when p=1, and 1 when p is large. */ const int num_coarse_iter = 1 + 9/zz->Num_Proc; ZOLTAN_TRACE_ENTER(zz, yo); if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Force LocalCoarsePartition if large global graph */ #define LARGE_GRAPH_VTX 64000 #define LARGE_GRAPH_PINS 256000 if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){ /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */ local_coarse_part = 1; } /* take care of all special cases first */ if (!strcasecmp(hgp->coarsepartition_str, "no") || !strcasecmp(hgp->coarsepartition_str, "none")) { /* Do no coarse partitioning. */ /* Do a sanity test and mapping to parts [0,...,numPart-1] */ int first = 1; PHGComm *hgc=phg->comm; Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm); if (hgp->UsePrefPart) { for (i = 0; i < phg->nVtx; i++) { /* Impose fixed vertex/preferred part constraints. */ if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */ /* randomly assigned to a part */ part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart); } else { if (phg->bisec_split < 0) /* direct k-way, use part numbers directly */ part[i] = phg->pref_part[i]; else /* recursive bisection, map to 0-1 part numbers */ part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1); } } } else { for (i = 0; i < phg->nVtx; i++) { if (part[i] >= numPart || part[i]<0) { if (first) { ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts."); first = 0; ierr = ZOLTAN_WARN; } part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart; } } } } else if (numPart == 1) { /* everything goes in the one partition */ for (i = 0; i < phg->nVtx; i++) part[i] = 0; } else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { /* more partitions than vertices, trivial answer */ for (i = 0; i < phg->nVtx; i++) part[i] = phg->dist_x[phg->comm->myProc_x]+i; } else if (local_coarse_part) { /* Apply local partitioner to each column */ ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp, hgp->CoarsePartition); } else { /* Normal case: * Gather distributed HG to each processor; * compute different partitioning on each processor; * select the "best" result. */ ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition; /* Select different coarse partitioners for processors here. */ CoarsePartition = hgp->CoarsePartition; if (CoarsePartition == NULL) { /* auto */ /* Select a coarse partitioner from the array of coarse partitioners */ CoarsePartition = CoarsePartitionFns[phg->comm->myProc % NUM_COARSEPARTITION_FNS]; } if (phg->comm->nProc == 1) { /* Serial and parallel hgraph are the same. */ shg = phg; } else { /* Set up a serial communication struct for gathered HG */ if (first_time) { scomm.nProc_x = scomm.nProc_y = 1; scomm.myProc_x = scomm.myProc_y = 0; scomm.Communicator = MPI_COMM_SELF; scomm.row_comm = MPI_COMM_SELF; scomm.col_comm = MPI_COMM_SELF; scomm.myProc = 0; scomm.nProc = 1; first_time = 0; } scomm.RNGState = Zoltan_Rand(NULL); scomm.RNGState_row = Zoltan_Rand(NULL); scomm.RNGState_col = Zoltan_Rand(NULL); scomm.zz = zz; /* * Gather parallel hypergraph phg to each processor, creating * serial hypergraph shg. */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator); } ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather."); goto End; } if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } } /* * Allocate partition array spart for the serial hypergraph shg * and partition shg. */ spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1), sizeof(int)); bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); if ((!spart) || (!bestvals)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory."); ierr = ZOLTAN_MEMERR; goto End; } /* Compute several coarse partitionings. */ /* Keep the NUM_PART_KEEP best ones around. */ /* Currently, only the best one is used. */ /* Set RNG so different procs compute different parts. */ Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL); new_cand = 0; new_part = spart; for (i=0; i< num_coarse_iter; i++){ int savefmlooplimit=hgp->fm_loop_limit; /* Overwrite worst partition with new candidate. */ ierr = CoarsePartition(zz, shg, numPart, part_sizes, new_part, hgp); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from CoarsePartition."); goto End; } /* time refinement step in coarse partitioner */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator); } /* UVCUVC: Refine new candidate: only one pass is enough. */ hgp->fm_loop_limit = 1; Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp); hgp->fm_loop_limit = savefmlooplimit; /* stop refinement timer */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Decide if candidate is in the top tier or not. */ /* Our objective is a combination of cuts and balance */ bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, numPart, new_part); cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr); /* Use ratio-cut as our objective. There are many other options! */ bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */ if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut."); goto End; } if (i<NUM_PART_KEEP) new_cand = i+1; else { /* find worst partition vector, to overwrite it */ /* future optimization: keep bestvals sorted */ worst = 0; worst_cut = bestvals[0]; for (j=1; j<NUM_PART_KEEP+1; j++){ if (worst_cut < bestvals[j]){ worst_cut = bestvals[j]; worst = j; } } new_cand = worst; } new_part = spart+new_cand*(shg->nVtx); } /* Copy last partition vector such that all the best ones are contiguous starting at spart. */ for (i=0; i<shg->nVtx; i++){ new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i]; } /* Also update bestvals */ bestvals[new_cand] = bestvals[NUM_PART_KEEP]; /* Evaluate and select the best. */ /* For now, only pick the best one, in the future we pick the k best. */ ierr = pick_best(zz, hgp, phg->comm, shg, numPart, MIN(NUM_PART_KEEP, num_coarse_iter), spart, bestvals); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from pick_best."); goto End; } if (phg->comm->nProc > 1) { /* Map gathered partition back to 2D distribution */ for (i = 0; i < phg->nVtx; i++) { /* KDDKDD Assume vertices in serial HG are ordered by GNO of phg */ si = VTX_LNO_TO_GNO(phg, i); part[i] = spart[si]; } Zoltan_HG_HGraph_Free(shg); ZOLTAN_FREE(&shg); } else { /* single processor */ for (i = 0; i < phg->nVtx; i++) part[i] = spart[i]; } ZOLTAN_FREE(&spart); ZOLTAN_FREE(&bestvals); } End: if (fine_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TRACE_EXIT(zz, yo); return ierr; }
/* Main partitioning function for hypergraph partitioning. */ int Zoltan_PHG_Partition ( ZZ *zz, /* Zoltan data structure */ HGraph *hg, /* Input hypergraph to be partitioned */ int p, /* Input: number partitions to be generated */ float *part_sizes, /* Input: array of length p containing percentages of work to be assigned to each partition */ Partition parts, /* Input: initial partition #s; aligned with vtx arrays. Output: computed partition #s */ PHGPartParams *hgp) /* Input: parameters for hgraph partitioning. */ { PHGComm *hgc = hg->comm; VCycle *vcycle=NULL, *del=NULL; int i, err = ZOLTAN_OK, middle; ZOLTAN_GNO_TYPE origVpincnt; /* for processor reduction test */ ZOLTAN_GNO_TYPE prevVcnt = 2*hg->dist_x[hgc->nProc_x]; /* initialized so that the */ ZOLTAN_GNO_TYPE prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; /* while loop will be entered before any coarsening */ ZOLTAN_GNO_TYPE tot_nPins, local_nPins; MPI_Datatype zoltan_gno_mpi_type; char *yo = "Zoltan_PHG_Partition"; int do_timing = (hgp->use_timers > 1); int fine_timing = (hgp->use_timers > 2); int vcycle_timing = (hgp->use_timers > 4 && hgp->ProRedL == 0); short refine = 0; struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int reset_geometric_matching = 0; char reset_geometric_string[4]; ZOLTAN_TRACE_ENTER(zz, yo); zoltan_gno_mpi_type = Zoltan_mpi_gno_type(); if (do_timing) { if (timer->vcycle < 0) timer->vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle"); if (timer->procred < 0) timer->procred = Zoltan_Timer_Init(zz->ZTime, 0, "Processor Reduction"); if (timer->match < 0) timer->match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching"); if (timer->coarse < 0) timer->coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening"); if (timer->coarsepart < 0) timer->coarsepart = Zoltan_Timer_Init(zz->ZTime, 1, "Coarse_Partition"); if (timer->refine < 0) timer->refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement"); if (timer->project < 0) timer->project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up"); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins; MPI_Allreduce(&local_nPins,&tot_nPins,1,zoltan_gno_mpi_type,MPI_SUM,hgc->Communicator); origVpincnt = tot_nPins; if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } /* For geometric coarsening, hgp->matching pointer and string are reset * after geometric_levels of coarsening. Will need to reset them after * this vcycle is completed. Capture that fact now! */ if (!strcasecmp(hgp->redm_str, "rcb") || !strcasecmp(hgp->redm_str, "rib")) { reset_geometric_matching = 1; strcpy(reset_geometric_string, hgp->redm_str); } /****** Coarsening ******/ #define COARSEN_FRACTION_LIMIT 0.9 /* Stop if we don't make much progress */ while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > (ZOLTAN_GNO_TYPE)hg->redl) && ((hg->dist_x[hgc->nProc_x] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) /* prevVcnt initialized to 2*hg->dist_x[hgc->nProc_x] */ || (hg->dist_y[hgc->nProc_y] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) /* prevVedgecnt initialized to 2*hg->dist_y[hgc->nProc_y] */ && hg->dist_y[hgc->nProc_y] && hgp->matching) { ZOLTAN_GNO_TYPE *match = NULL; VCycle *coarser=NULL, *redistributed=NULL; prevVcnt = hg->dist_x[hgc->nProc_x]; prevVedgecnt = hg->dist_y[hgc->nProc_y]; #ifdef _DEBUG /* UVC: load balance stats */ Zoltan_PHG_LoadBalStat(zz, hg); #endif if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->match, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_match < 0) { char str[80]; sprintf(str, "VC Matching %d", hg->info); vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match, hgc->Communicator); } /* Allocate and initialize Matching Array */ if (hg->nVtx && !(match = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC (hg->nVtx*sizeof(ZOLTAN_GNO_TYPE)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; i++) match[i] = i; /* Calculate matching (packing or grouping) */ err = Zoltan_PHG_Matching (zz, hg, match, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_FREE (&match); goto End; } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->match, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->coarse, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_coarse < 0) { char str[80]; sprintf(str, "VC Coarsening %d", hg->info); vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); } if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) { ZOLTAN_FREE (&match); ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL."); goto End; } /* Construct coarse hypergraph and LevelMap */ err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap, &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, &vcycle->comm_plan, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarse, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } ZOLTAN_FREE (&match); if ((err=allocVCycle(coarser))!= ZOLTAN_OK) goto End; vcycle = coarser; hg = vcycle->hg; if (hgc->nProc > 1 && hgp->ProRedL > 0) { local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins; MPI_Allreduce(&local_nPins, &tot_nPins, 1, zoltan_gno_mpi_type, MPI_SUM, hgc->Communicator); if (tot_nPins < (ZOLTAN_GNO_TYPE)(hgp->ProRedL * origVpincnt + 0.5)) { if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->procred, hgc->Communicator); } /* redistribute to half the processors */ origVpincnt = tot_nPins; /* update for processor reduction test */ if(hg->nVtx&&!(hg->vmap=(int*)ZOLTAN_MALLOC(hg->nVtx*sizeof(int)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: hg->vmap"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; i++) hg->vmap[i] = i; middle = (int)((float) (hgc->nProc-1) * hgp->ProRedL); if (hgp->nProc_x_req!=1&&hgp->nProc_y_req!=1) { /* Want 2D decomp */ if ((middle+1) > SMALL_PRIME && Zoltan_PHG_isPrime(middle+1)) --middle; /* if it was prime just use one less #procs (since it should be bigger than SMALL_PRIME it is safe to decrement) */ } if (!(hgc = (PHGComm*) ZOLTAN_MALLOC (sizeof(PHGComm)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: PHGComm"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } if (!(redistributed=newVCycle(zz,NULL,NULL,vcycle,vcycle_timing))) { ZOLTAN_FREE (&hgc); ZOLTAN_PRINT_ERROR (zz->Proc, yo, "redistributed is NULL."); goto End; } Zoltan_PHG_Redistribute(zz,hgp,hg,0,middle,hgc, redistributed->hg, &vcycle->vlno,&vcycle->vdest); if (hgp->UseFixedVtx || hgp->UsePrefPart) redistributed->hg->bisec_split = hg->bisec_split; if ((err=allocVCycle(redistributed))!= ZOLTAN_OK) goto End; vcycle = redistributed; if (hgc->myProc < 0) /* I'm not in the redistributed part so I should go to uncoarsening refinement and wait */ { if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine =Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); } if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } goto Refine; } hg = vcycle->hg; hg->redl = hgp->redl; /* not set with hg creation */ if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } } } } if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); /* free array that may have been allocated in matching */ if (hgp->vtx_scal) { hgp->vtx_scal_size = 0; ZOLTAN_FREE(&(hgp->vtx_scal)); } if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->coarsepart, hgc->Communicator); } /****** Coarse Partitioning ******/ err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarsepart, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } Refine: del = vcycle; refine = 1; /****** Uncoarsening/Refinement ******/ while (vcycle) { VCycle *finer = vcycle->finer; hg = vcycle->hg; if (refine && hgc->myProc >= 0) { if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->refine, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_refine < 0) { char str[80]; sprintf(str, "VC Refinement %d", hg->info); vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine, hgc->Communicator); } err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->refine, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine, hgc->Communicator); if (hgp->output_level >= PHG_DEBUG_LIST) uprintf(hgc, "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p, Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, 0, p, vcycle->Part), Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err)); if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part, "partitioned plot"); } if (finer) { int *rbuffer; /* Project coarse partition to fine partition */ if (finer->comm_plan) { refine = 1; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->project, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_project < 0) { char str[80]; sprintf(str, "VC Project Up %d", hg->info); vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project, hgc->Communicator); } /* easy to assign partitions to internal matches */ for (i = 0; i < finer->hg->nVtx; i++) if (finer->LevelMap[i] >= 0) /* if considers only the local vertices */ finer->Part[i] = vcycle->Part[finer->LevelMap[i]]; /* now that the course partition assignments have been propagated */ /* upward to the finer level for the local vertices, we need to */ /* fill the LevelData (matched pairs of a local vertex with a */ /* off processor vertex) with the partition assignment of the */ /* local vertex - can be done totally in the finer level! */ for (i = 0; i < finer->LevelCnt; i++) { ++i; /* skip over off processor lno */ finer->LevelData[i] = finer->Part[finer->LevelData[i]]; } /* allocate rec buffer to exchange LevelData information */ rbuffer = NULL; if (finer->LevelSndCnt > 0) { rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int)); if (!rbuffer) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } } /* get partition assignments from owners of externally matched vtxs */ Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i); Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer); /* process data to assign partitions to expernal matches */ for (i = 0; i < 2 * finer->LevelSndCnt;) { int lno, partition; lno = rbuffer[i++]; partition = rbuffer[i++]; finer->Part[lno] = partition; } ZOLTAN_FREE (&rbuffer); Zoltan_Comm_Destroy (&finer->comm_plan); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->project, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project, hgc->Communicator); } else { int *sendbuf = NULL, size; refine = 0; /* ints local and partition numbers */ if (finer->vlno) { sendbuf = (int*) ZOLTAN_MALLOC (2 * hg->nVtx * sizeof(int)); if (!sendbuf) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; ++i) { sendbuf[2 * i] = finer->vlno[i]; /* assign local numbers */ sendbuf[2 * i + 1] = vcycle->Part[i];/* assign partition numbers */ } } ZOLTAN_FREE (&hgc); hgc = finer->hg->comm; /* updating hgc is required when the processors change */ /* Create comm plan to unredistributed processors */ err = Zoltan_Comm_Create(&finer->comm_plan, finer->vlno ? hg->nVtx : 0, finer->vdest, hgc->Communicator, COMM_TAG+2, &size); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_PRINT_ERROR(hgc->myProc, yo, "Zoltan_Comm_Create failed."); goto End; } /* allocate rec buffer to exchange sendbuf information */ rbuffer = NULL; if (finer->hg->nVtx) { rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->hg->nVtx * sizeof(int)); if (!rbuffer) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } } /* Use plan to send partitions to the unredistributed processors */ Zoltan_Comm_Do(finer->comm_plan, COMM_TAG+3, (char *) sendbuf, 2*sizeof(int), (char *) rbuffer); MPI_Bcast(rbuffer, 2*finer->hg->nVtx, MPI_INT, 0, hgc->col_comm); /* process data to assign partitions to unredistributed processors */ for (i = 0; i < 2 * finer->hg->nVtx;) { int lno, partition; lno = rbuffer[i++]; partition = rbuffer[i++]; finer->Part[lno] = partition; } if (finer->vlno) ZOLTAN_FREE (&sendbuf); ZOLTAN_FREE (&rbuffer); Zoltan_Comm_Destroy (&finer->comm_plan); } } vcycle = finer; } /* while (vcycle) */ End: vcycle = del; while (vcycle) { if (vcycle_timing) { Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout); Zoltan_Timer_Destroy(&vcycle->timer); } if (vcycle->finer) { /* cleanup by level */ Zoltan_HG_HGraph_Free (vcycle->hg); if (vcycle->LevelData) Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part, &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg); else if (vcycle->vlno) Zoltan_Multifree (__FILE__, __LINE__, 5, &vcycle->Part, &vcycle->vdest, &vcycle->vlno, &vcycle->LevelMap, &vcycle->hg); else Zoltan_Multifree (__FILE__, __LINE__, 3, &vcycle->Part, &vcycle->LevelMap, &vcycle->hg); } else /* cleanup top level */ Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap, &vcycle->LevelData); del = vcycle; vcycle = vcycle->finer; ZOLTAN_FREE(&del); } if (reset_geometric_matching) { strcpy(hgp->redm_str, reset_geometric_string); Zoltan_PHG_Set_Matching_Fn(hgp); } if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TRACE_EXIT(zz, yo) ; return err; }
static int pick_best( ZZ *zz, PHGPartParams *hgp, PHGComm *phg_comm, HGraph *shg, int numPart, int numLocalCandidates, int *spart, /* On input: All candidate partition vectors concatenated. On output: Best partition vectors concatenated. */ float *cutvals /* Reuse cut values if available */ ) { /* Routine to select the best of multiple serial partitions and broadcast * it to all processors */ struct { float val; int rank; } local[2], global[2]; static char *yo = "pick_best"; int i, mybest; float cut, bal; int err = ZOLTAN_OK; /* find best local partition */ /* if cut values are given on input use these (may be ratio-cut), otherwise, only look at cuts not balance. (EB should never happen) */ mybest = 0; /* local[0].val = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, numPart, spart); */ local[0].val = bal = 0.0; /* balance */ if (cutvals) local[1].val = cutvals[0]; else local[1].val = Zoltan_PHG_Compute_ConCut(shg->comm, shg, spart, numPart, &err); if (err < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut"); goto End; } local[0].rank = local[1].rank = phg_comm->myProc; /* What do we say is "best"? For now, say lowest (ratio) cut. */ for (i=1; i<numLocalCandidates; i++){ /*bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, numPart, spart+i*(shg->nVtx));*/ if (cutvals) cut = cutvals[i]; else cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, spart+i*(shg->nVtx), numPart, &err); if (err < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut"); goto End; } if (cut < local[1].val){ mybest = i; local[0].val = bal; /* currently not used */ local[1].val = cut; } } /* copy best partition to beginning of spart */ for (i=0; i<shg->nVtx; i++) spart[i] = spart[mybest*(shg->nVtx)+i]; /* Pick lowest ratio cut as best. */ MPI_Allreduce(local, global, 2, MPI_FLOAT_INT, MPI_MINLOC, phg_comm->Communicator); if (hgp->output_level) uprintf(phg_comm, "Local Ratio Cut= %.2lf Global Ratio Cut= %.2lf\n", local[1].val, global[1].val); MPI_Bcast(spart, shg->nVtx, MPI_INT, global[1].rank, phg_comm->Communicator); End: return err; }