int Zoltan_Distribute_layout (ZZ *zz, const PHGComm * const inlayout, int loRank, int hiRank, int reqx, int reqy, PHGComm *outlayout) { MPI_Group allgrp, newgrp; int *ranks; MPI_Comm nmpicomm; MPI_Comm ompicomm; int myProc; int i; int nProc; ompicomm = (inlayout != NULL)?inlayout->Communicator:zz->Communicator; myProc = (inlayout != NULL)?inlayout->myProc:zz->Proc; nProc= (inlayout != NULL)?inlayout->nProc:zz->Num_Proc; if (((reqx != 1) && (reqy != 1) && (nProc > 3)) && Zoltan_PHG_isPrime(nProc)) nProc--; Zoltan_PHGComm_Init(outlayout); /* create a new communicator for procs[lo..hi] */ MPI_Comm_group(ompicomm, &allgrp); ranks = (int *) ZOLTAN_MALLOC(nProc * sizeof(int)); if (!ranks) return ZOLTAN_MEMERR; for (i=loRank; i<=hiRank; ++i) ranks[i-loRank] = i; MPI_Group_incl(allgrp, nProc, ranks, &newgrp); MPI_Comm_create(ompicomm, newgrp, &nmpicomm); MPI_Group_free(&newgrp); MPI_Group_free(&allgrp); ZOLTAN_FREE(&ranks); return (Zoltan_PHG_Set_2D_Proc_Distrib(zz, nmpicomm, myProc-loRank, nProc, reqx, reqy, outlayout)); }
int Zoltan_PHG_Initialize_Params( ZZ *zz, /* the Zoltan data structure */ float *part_sizes, PHGPartParams *hgp ) { int err = ZOLTAN_OK; char *yo = "Zoltan_PHG_Initialize_Params"; int nProc; int usePrimeComm; MPI_Comm communicator; char add_obj_weight[MAX_PARAM_STRING_LEN]; char edge_weight_op[MAX_PARAM_STRING_LEN]; char cut_objective[MAX_PARAM_STRING_LEN]; char *package = hgp->hgraph_pkg; char *method = hgp->hgraph_method; char buf[1024]; memset(hgp, 0, sizeof(*hgp)); /* in the future if we forget to initialize another param at least it will be 0 */ Zoltan_Bind_Param(PHG_params, "HYPERGRAPH_PACKAGE", &hgp->hgraph_pkg); Zoltan_Bind_Param(PHG_params, "PHG_MULTILEVEL", &hgp->useMultilevel); Zoltan_Bind_Param(PHG_params, "PHG_FROM_GRAPH_METHOD", hgp->convert_str); Zoltan_Bind_Param(PHG_params, "PHG_OUTPUT_LEVEL", &hgp->output_level); Zoltan_Bind_Param(PHG_params, "FINAL_OUTPUT", &hgp->final_output); Zoltan_Bind_Param(PHG_params, "CHECK_GRAPH", &hgp->check_graph); Zoltan_Bind_Param(PHG_params, "CHECK_HYPERGRAPH", &hgp->check_graph); Zoltan_Bind_Param(PHG_params, "PHG_NPROC_VERTEX", &hgp->nProc_x_req); Zoltan_Bind_Param(PHG_params, "PHG_NPROC_EDGE", &hgp->nProc_y_req); Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_LIMIT", &hgp->redl); Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_NCANDIDATE", &hgp->nCand); Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_METHOD", hgp->redm_str); Zoltan_Bind_Param(PHG_params, "PHG_COARSENING_METHOD_FAST", hgp->redm_fast); Zoltan_Bind_Param(PHG_params, "PHG_VERTEX_VISIT_ORDER", &hgp->visit_order); Zoltan_Bind_Param(PHG_params, "PHG_EDGE_SCALING", &hgp->edge_scaling); Zoltan_Bind_Param(PHG_params, "PHG_VERTEX_SCALING", &hgp->vtx_scaling); Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_METHOD", hgp->refinement_str); Zoltan_Bind_Param(PHG_params, "PHG_DIRECT_KWAY", &hgp->kway); Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_LOOP_LIMIT", &hgp->fm_loop_limit); Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_MAX_NEG_MOVE", &hgp->fm_max_neg_move); Zoltan_Bind_Param(PHG_params, "PHG_REFINEMENT_QUALITY", &hgp->refinement_quality); Zoltan_Bind_Param(PHG_params, "PHG_COARSEPARTITION_METHOD", hgp->coarsepartition_str); Zoltan_Bind_Param(PHG_params, "PHG_USE_TIMERS", (void*) &hgp->use_timers); Zoltan_Bind_Param(PHG_params, "USE_TIMERS", (void*) &hgp->use_timers); Zoltan_Bind_Param(PHG_params, "PHG_EDGE_SIZE_THRESHOLD", (void*) &hgp->EdgeSizeThreshold); Zoltan_Bind_Param(PHG_params, "PHG_MATCH_EDGE_SIZE_THRESHOLD", (void*) &hgp->MatchEdgeSizeThreshold); Zoltan_Bind_Param(PHG_params, "PHG_BAL_TOL_ADJUSTMENT", (void*) &hgp->bal_tol_adjustment); Zoltan_Bind_Param(PHG_params, "PARKWAY_SERPART", (void *) hgp->parkway_serpart); Zoltan_Bind_Param(PHG_params, "PHG_CUT_OBJECTIVE", (void *) &cut_objective); Zoltan_Bind_Param(PHG_params, "ADD_OBJ_WEIGHT", (void *) add_obj_weight); Zoltan_Bind_Param(PHG_params, "PHG_EDGE_WEIGHT_OPERATION", (void *) edge_weight_op); Zoltan_Bind_Param(PHG_params, "PHG_RANDOMIZE_INPUT", (void*) &hgp->RandomizeInitDist); Zoltan_Bind_Param(PHG_params, "PHG_PROCESSOR_REDUCTION_LIMIT", (void*) &hgp->ProRedL); Zoltan_Bind_Param(PHG_params, "PHG_REPART_MULTIPLIER", (void*) &hgp->RepartMultiplier); Zoltan_Bind_Param(PHG_params, "PATOH_ALLOC_POOL0", (void*) &hgp->patoh_alloc_pool0); Zoltan_Bind_Param(PHG_params, "PATOH_ALLOC_POOL1", (void*) &hgp->patoh_alloc_pool1); /* Set default values */ strncpy(hgp->hgraph_pkg, "phg", MAX_PARAM_STRING_LEN); strncpy(hgp->convert_str, "neighbors", MAX_PARAM_STRING_LEN); strncpy(hgp->redm_str, "agg", MAX_PARAM_STRING_LEN); hgp->match_array_type = 0; strncpy(hgp->redm_fast, "l-ipm", MAX_PARAM_STRING_LEN); strncpy(hgp->coarsepartition_str, "auto", MAX_PARAM_STRING_LEN); strncpy(hgp->refinement_str, "fm2", MAX_PARAM_STRING_LEN); strncpy(hgp->parkway_serpart, "patoh", MAX_PARAM_STRING_LEN); strncpy(cut_objective, "connectivity", MAX_PARAM_STRING_LEN); strncpy(add_obj_weight, "none", MAX_PARAM_STRING_LEN); strncpy(edge_weight_op, "max", MAX_PARAM_STRING_LEN); /* LB.Approach is initialized to "REPARTITION", and set in Set_Key_Params */ strncpy(hgp->hgraph_method, zz->LB.Approach, MAX_PARAM_STRING_LEN); if (!strcasecmp(zz->LB.Approach,"REFINE")) hgp->useMultilevel = 0; else hgp->useMultilevel = 1; hgp->use_timers = 0; hgp->LocalCoarsePartition = 0; hgp->edge_scaling = 0; hgp->vtx_scaling = 0; hgp->vtx_scal_size = 0; hgp->vtx_scal = NULL; /* Array for storing vertex degree scale vector. Should perhaps go in hg structure, not the param struct? */ hgp->connectivity_cut = 1; hgp->visit_order = 0; /* Random */ hgp->check_graph = 0; hgp->bal_tol = zz->LB.Imbalance_Tol[0]; /* Make vector for multiconstraint */ hgp->bal_tol_adjustment = 0.7; hgp->nCand = 100; hgp->redl = MAX(2*zz->LB.Num_Global_Parts, 100); hgp->output_level = PHG_DEBUG_NONE; hgp->final_output = 0; hgp->nProc_x_req = -1; hgp->nProc_y_req = -1; hgp->kway = 0; hgp->fm_loop_limit = 10; hgp->fm_max_neg_move = 250; hgp->refinement_quality = 1; hgp->RandomizeInitDist = 0; hgp->EdgeSizeThreshold = 0.25; hgp->MatchEdgeSizeThreshold = 500; hgp->hybrid_keep_factor = 0.; hgp->ProRedL = 0.0; /* UVCUVC: CHECK default set to 0 until we run more experiments */ hgp->RepartMultiplier = 100.; hgp->patoh_alloc_pool0 = 0; hgp->patoh_alloc_pool1 = 0; hgp->UseFixedVtx = 0; hgp->UsePrefPart = 0; /* Get application values of parameters. */ err = Zoltan_Assign_Param_Vals(zz->Params, PHG_params, zz->Debug_Level, zz->Proc, zz->Debug_Proc); nProc = zz->Num_Proc; usePrimeComm = 0; /* Parse add_obj_weight parameter */ if (!strcasecmp(add_obj_weight, "none")) { hgp->add_obj_weight = PHG_ADD_NO_WEIGHT; hgp->part_sizes = part_sizes; } else if (zz->Obj_Weight_Dim > 0) { /* Do not add_obj_weight until multiconstraint PHG is implemented */ ZOLTAN_PRINT_WARN(zz->Proc, yo, "Both application supplied *and* ADD_OBJ_WEIGHT " "calculated vertex weights were provided."); ZOLTAN_PRINT_WARN(zz->Proc, yo, "Only the first application supplied weight per vertex will be used."); hgp->add_obj_weight = PHG_ADD_NO_WEIGHT; hgp->part_sizes = part_sizes; } else { if (!strcasecmp(add_obj_weight, "vertices")){ hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT; } else if (!strcasecmp(add_obj_weight, "unit")){ hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT; } else if (!strcasecmp(add_obj_weight, "vertex degree")){ hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT; } else if (!strcasecmp(add_obj_weight, "nonzeros")){ hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT; } else if (!strcasecmp(add_obj_weight, "pins")){ hgp->add_obj_weight = PHG_ADD_PINS_WEIGHT; } else{ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid ADD_OBJ_WEIGHT parameter.\n"); err = ZOLTAN_WARN; } /* Set hgp->part_sizes to new array of part_sizes with added obj weight. */ if (part_sizes) err = Zoltan_LB_Add_Part_Sizes_Weight(zz, (zz->Obj_Weight_Dim ? zz->Obj_Weight_Dim : 1), zz->Obj_Weight_Dim+1, part_sizes, &(hgp->part_sizes)); } if ((zz->Obj_Weight_Dim==0) && /* no application supplied weights */ (hgp->add_obj_weight==PHG_ADD_NO_WEIGHT)){ /* no calculated weight */ hgp->add_obj_weight = PHG_ADD_UNIT_WEIGHT; /* default object weight */ } if (!strcasecmp(cut_objective, "default") || !strcasecmp(cut_objective, "connectivity")) hgp->connectivity_cut = 1; else if (!strcasecmp(cut_objective, "hyperedges")) hgp->connectivity_cut = 0; else { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid PHG_CUT_OBJECTIVE parameter.\n"); goto End; } if (!strcasecmp(edge_weight_op, "max")){ hgp->edge_weight_op = PHG_MAX_EDGE_WEIGHTS; } else if (!strcasecmp(edge_weight_op, "add")){ hgp->edge_weight_op = PHG_ADD_EDGE_WEIGHTS; } else if (!strcasecmp(edge_weight_op, "error")){ hgp->edge_weight_op = PHG_FLAG_ERROR_EDGE_WEIGHTS; } else{ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid PHG_EDGE_WEIGHT_OPERATION parameter.\n"); err = ZOLTAN_WARN; } if ((strcasecmp(method, "PARTITION")) && (strcasecmp(method, "REPARTITION")) && (strcasecmp(method, "REFINE"))) { sprintf(buf,"%s is not a valid hypergraph method\n",method); ZOLTAN_PRINT_ERROR (zz->Proc, yo, buf); err = ZOLTAN_FATAL; goto End; } /* Adjust refinement parameters using hgp->refinement_quality */ if (hgp->refinement_quality < 0.5/hgp->fm_loop_limit) /* No refinement */ strncpy(hgp->refinement_str, "no", MAX_PARAM_STRING_LEN); else { /* Scale FM parameters */ hgp->fm_loop_limit *= hgp->refinement_quality; hgp->fm_max_neg_move *= hgp->refinement_quality; } if (!strcasecmp(package, "PHG")){ /* Test to determine whether we should change the number of processors used for partitioning to make more efficient 2D decomposition */ if (hgp->nProc_x_req != 1 && hgp->nProc_y_req != 1) /* Want 2D decomp */ if (zz->Num_Proc > SMALL_PRIME && Zoltan_PHG_isPrime(zz->Num_Proc)) /* 2D data decomposition is requested but we have a prime * number of processors. */ usePrimeComm = 1; if ((!strcasecmp(method, "REPARTITION"))){ zz->LB.Remap_Flag = 0; } if ((!strcasecmp(method, "REPARTITION")) || (!strcasecmp(method, "REFINE"))) { hgp->fm_loop_limit = 4; /* experimental evaluation showed that for repartitioning/refinement small number of passes is "good enough". These are all heuristics hence it is possible to create a pathological cases; but in general this seems to be sufficient */ } if (!hgp->useMultilevel) { /* don't do coarsening */ strncpy(hgp->redm_str, "no", MAX_PARAM_STRING_LEN); /* we have modified all coarse partitioners to handle preferred part if user wants to choose one she can choose; otherwise default partitioner (greedy growing) does work better than previous default partitioning for phg_refine ("no"). */ hgp->UsePrefPart = 1; } if (!strcasecmp(method, "REFINE") && hgp->useMultilevel){ /* UVCUVC: as a heuristic we prefer local matching; in our experiments for IPDPS'07 and WileyChapter multilevel_refine didn't prove itself useful; it is too costly even with local matching hence it will not be be released yet (i.e. not in v3). */ strncpy(hgp->redm_str, "l-ipm", MAX_PARAM_STRING_LEN); hgp->UsePrefPart = 1; } } else if (!strcasecmp(package, "PARKWAY")){ if (hgp->nProc_x_req>1) { err = ZOLTAN_FATAL; ZOLTAN_PRINT_ERROR(zz->Proc, yo, "ParKway requires nProc_x=1 or -1."); goto End; } hgp->nProc_x_req = 1; } else if (!strcasecmp(package, "PATOH")){ if (zz->Num_Proc>1) { err = ZOLTAN_FATAL; ZOLTAN_PRINT_ERROR(zz->Proc, yo, "PaToH only works with Num_Proc=1."); goto End; } } if (!usePrimeComm) MPI_Comm_dup(zz->Communicator, &communicator); else { MPI_Group newgrp, zzgrp; nProc--; MPI_Comm_group(zz->Communicator, &zzgrp); MPI_Group_excl(zzgrp, 1, &nProc, &newgrp); MPI_Comm_create(zz->Communicator, newgrp, &communicator); MPI_Group_free(&newgrp); MPI_Group_free(&zzgrp); } err = Zoltan_PHG_Set_2D_Proc_Distrib(zz, communicator, zz->Proc, nProc, hgp->nProc_x_req, hgp->nProc_y_req, &hgp->globalcomm); if (err != ZOLTAN_OK) goto End; /* Convert strings to function pointers. */ err = Zoltan_PHG_Set_Part_Options (zz, hgp); End: return err; }
/* Main partitioning function for hypergraph partitioning. */ int Zoltan_PHG_Partition ( ZZ *zz, /* Zoltan data structure */ HGraph *hg, /* Input hypergraph to be partitioned */ int p, /* Input: number partitions to be generated */ float *part_sizes, /* Input: array of length p containing percentages of work to be assigned to each partition */ Partition parts, /* Input: initial partition #s; aligned with vtx arrays. Output: computed partition #s */ PHGPartParams *hgp) /* Input: parameters for hgraph partitioning. */ { PHGComm *hgc = hg->comm; VCycle *vcycle=NULL, *del=NULL; int i, err = ZOLTAN_OK, middle; ZOLTAN_GNO_TYPE origVpincnt; /* for processor reduction test */ ZOLTAN_GNO_TYPE prevVcnt = 2*hg->dist_x[hgc->nProc_x]; /* initialized so that the */ ZOLTAN_GNO_TYPE prevVedgecnt = 2*hg->dist_y[hgc->nProc_y]; /* while loop will be entered before any coarsening */ ZOLTAN_GNO_TYPE tot_nPins, local_nPins; MPI_Datatype zoltan_gno_mpi_type; char *yo = "Zoltan_PHG_Partition"; int do_timing = (hgp->use_timers > 1); int fine_timing = (hgp->use_timers > 2); int vcycle_timing = (hgp->use_timers > 4 && hgp->ProRedL == 0); short refine = 0; struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int reset_geometric_matching = 0; char reset_geometric_string[4]; ZOLTAN_TRACE_ENTER(zz, yo); zoltan_gno_mpi_type = Zoltan_mpi_gno_type(); if (do_timing) { if (timer->vcycle < 0) timer->vcycle = Zoltan_Timer_Init(zz->ZTime, 0, "Vcycle"); if (timer->procred < 0) timer->procred = Zoltan_Timer_Init(zz->ZTime, 0, "Processor Reduction"); if (timer->match < 0) timer->match = Zoltan_Timer_Init(zz->ZTime, 1, "Matching"); if (timer->coarse < 0) timer->coarse = Zoltan_Timer_Init(zz->ZTime, 1, "Coarsening"); if (timer->coarsepart < 0) timer->coarsepart = Zoltan_Timer_Init(zz->ZTime, 1, "Coarse_Partition"); if (timer->refine < 0) timer->refine = Zoltan_Timer_Init(zz->ZTime, 1, "Refinement"); if (timer->project < 0) timer->project = Zoltan_Timer_Init(zz->ZTime, 1, "Project_Up"); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins; MPI_Allreduce(&local_nPins,&tot_nPins,1,zoltan_gno_mpi_type,MPI_SUM,hgc->Communicator); origVpincnt = tot_nPins; if (!(vcycle = newVCycle(zz, hg, parts, NULL, vcycle_timing))) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "VCycle is NULL."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } /* For geometric coarsening, hgp->matching pointer and string are reset * after geometric_levels of coarsening. Will need to reset them after * this vcycle is completed. Capture that fact now! */ if (!strcasecmp(hgp->redm_str, "rcb") || !strcasecmp(hgp->redm_str, "rib")) { reset_geometric_matching = 1; strcpy(reset_geometric_string, hgp->redm_str); } /****** Coarsening ******/ #define COARSEN_FRACTION_LIMIT 0.9 /* Stop if we don't make much progress */ while ((hg->redl>0) && (hg->dist_x[hgc->nProc_x] > (ZOLTAN_GNO_TYPE)hg->redl) && ((hg->dist_x[hgc->nProc_x] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVcnt + 0.5)) /* prevVcnt initialized to 2*hg->dist_x[hgc->nProc_x] */ || (hg->dist_y[hgc->nProc_y] < (ZOLTAN_GNO_TYPE) (COARSEN_FRACTION_LIMIT * prevVedgecnt + 0.5))) /* prevVedgecnt initialized to 2*hg->dist_y[hgc->nProc_y] */ && hg->dist_y[hgc->nProc_y] && hgp->matching) { ZOLTAN_GNO_TYPE *match = NULL; VCycle *coarser=NULL, *redistributed=NULL; prevVcnt = hg->dist_x[hgc->nProc_x]; prevVedgecnt = hg->dist_y[hgc->nProc_y]; #ifdef _DEBUG /* UVC: load balance stats */ Zoltan_PHG_LoadBalStat(zz, hg); #endif if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->match, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_match < 0) { char str[80]; sprintf(str, "VC Matching %d", hg->info); vcycle->timer_match = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_match, hgc->Communicator); } /* Allocate and initialize Matching Array */ if (hg->nVtx && !(match = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC (hg->nVtx*sizeof(ZOLTAN_GNO_TYPE)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: Matching array"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; i++) match[i] = i; /* Calculate matching (packing or grouping) */ err = Zoltan_PHG_Matching (zz, hg, match, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_FREE (&match); goto End; } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_match, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->match, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->coarse, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_coarse < 0) { char str[80]; sprintf(str, "VC Coarsening %d", hg->info); vcycle->timer_coarse = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); } if (!(coarser = newVCycle(zz, NULL, NULL, vcycle, vcycle_timing))) { ZOLTAN_FREE (&match); ZOLTAN_PRINT_ERROR (zz->Proc, yo, "coarser is NULL."); goto End; } /* Construct coarse hypergraph and LevelMap */ err = Zoltan_PHG_Coarsening (zz, hg, match, coarser->hg, vcycle->LevelMap, &vcycle->LevelCnt, &vcycle->LevelSndCnt, &vcycle->LevelData, &vcycle->comm_plan, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_coarse, hgc->Communicator); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarse, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } ZOLTAN_FREE (&match); if ((err=allocVCycle(coarser))!= ZOLTAN_OK) goto End; vcycle = coarser; hg = vcycle->hg; if (hgc->nProc > 1 && hgp->ProRedL > 0) { local_nPins = (ZOLTAN_GNO_TYPE)hg->nPins; MPI_Allreduce(&local_nPins, &tot_nPins, 1, zoltan_gno_mpi_type, MPI_SUM, hgc->Communicator); if (tot_nPins < (ZOLTAN_GNO_TYPE)(hgp->ProRedL * origVpincnt + 0.5)) { if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->procred, hgc->Communicator); } /* redistribute to half the processors */ origVpincnt = tot_nPins; /* update for processor reduction test */ if(hg->nVtx&&!(hg->vmap=(int*)ZOLTAN_MALLOC(hg->nVtx*sizeof(int)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: hg->vmap"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; i++) hg->vmap[i] = i; middle = (int)((float) (hgc->nProc-1) * hgp->ProRedL); if (hgp->nProc_x_req!=1&&hgp->nProc_y_req!=1) { /* Want 2D decomp */ if ((middle+1) > SMALL_PRIME && Zoltan_PHG_isPrime(middle+1)) --middle; /* if it was prime just use one less #procs (since it should be bigger than SMALL_PRIME it is safe to decrement) */ } if (!(hgc = (PHGComm*) ZOLTAN_MALLOC (sizeof(PHGComm)))) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory: PHGComm"); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } if (!(redistributed=newVCycle(zz,NULL,NULL,vcycle,vcycle_timing))) { ZOLTAN_FREE (&hgc); ZOLTAN_PRINT_ERROR (zz->Proc, yo, "redistributed is NULL."); goto End; } Zoltan_PHG_Redistribute(zz,hgp,hg,0,middle,hgc, redistributed->hg, &vcycle->vlno,&vcycle->vdest); if (hgp->UseFixedVtx || hgp->UsePrefPart) redistributed->hg->bisec_split = hg->bisec_split; if ((err=allocVCycle(redistributed))!= ZOLTAN_OK) goto End; vcycle = redistributed; if (hgc->myProc < 0) /* I'm not in the redistributed part so I should go to uncoarsening refinement and wait */ { if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine =Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); } if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } goto Refine; } hg = vcycle->hg; hg->redl = hgp->redl; /* not set with hg creation */ if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->procred, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } } } } if (hgp->output_level >= PHG_DEBUG_LIST) { uprintf(hgc, "START %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d...\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p); if (hgp->output_level > PHG_DEBUG_LIST) { err = Zoltan_HG_Info(zz, hg); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; } } if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, NULL, "coarsening plot"); /* free array that may have been allocated in matching */ if (hgp->vtx_scal) { hgp->vtx_scal_size = 0; ZOLTAN_FREE(&(hgp->vtx_scal)); } if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->coarsepart, hgc->Communicator); } /****** Coarse Partitioning ******/ err = Zoltan_PHG_CoarsePartition (zz, hg, p, part_sizes, vcycle->Part, hgp); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) goto End; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->coarsepart, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } Refine: del = vcycle; refine = 1; /****** Uncoarsening/Refinement ******/ while (vcycle) { VCycle *finer = vcycle->finer; hg = vcycle->hg; if (refine && hgc->myProc >= 0) { if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->refine, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_refine < 0) { char str[80]; sprintf(str, "VC Refinement %d", hg->info); vcycle->timer_refine = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_refine, hgc->Communicator); } err = Zoltan_PHG_Refinement (zz, hg, p, part_sizes, vcycle->Part, hgp); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->refine, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_refine, hgc->Communicator); if (hgp->output_level >= PHG_DEBUG_LIST) uprintf(hgc, "FINAL %3d |V|=%6d |E|=%6d #pins=%6d %d/%s/%s/%s p=%d bal=%.2f cutl=%.2f\n", hg->info, hg->nVtx, hg->nEdge, hg->nPins, hg->redl, hgp->redm_str, hgp->coarsepartition_str, hgp->refinement_str, p, Zoltan_PHG_Compute_Balance(zz, hg, part_sizes, 0, p, vcycle->Part), Zoltan_PHG_Compute_ConCut(hgc, hg, vcycle->Part, p, &err)); if (hgp->output_level >= PHG_DEBUG_PLOT) Zoltan_PHG_Plot(zz->Proc, hg->nVtx, p, hg->vindex, hg->vedge, vcycle->Part, "partitioned plot"); } if (finer) { int *rbuffer; /* Project coarse partition to fine partition */ if (finer->comm_plan) { refine = 1; if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->project, hgc->Communicator); } if (vcycle_timing) { if (vcycle->timer_project < 0) { char str[80]; sprintf(str, "VC Project Up %d", hg->info); vcycle->timer_project = Zoltan_Timer_Init(vcycle->timer, 0, str); } ZOLTAN_TIMER_START(vcycle->timer, vcycle->timer_project, hgc->Communicator); } /* easy to assign partitions to internal matches */ for (i = 0; i < finer->hg->nVtx; i++) if (finer->LevelMap[i] >= 0) /* if considers only the local vertices */ finer->Part[i] = vcycle->Part[finer->LevelMap[i]]; /* now that the course partition assignments have been propagated */ /* upward to the finer level for the local vertices, we need to */ /* fill the LevelData (matched pairs of a local vertex with a */ /* off processor vertex) with the partition assignment of the */ /* local vertex - can be done totally in the finer level! */ for (i = 0; i < finer->LevelCnt; i++) { ++i; /* skip over off processor lno */ finer->LevelData[i] = finer->Part[finer->LevelData[i]]; } /* allocate rec buffer to exchange LevelData information */ rbuffer = NULL; if (finer->LevelSndCnt > 0) { rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->LevelSndCnt * sizeof(int)); if (!rbuffer) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } } /* get partition assignments from owners of externally matched vtxs */ Zoltan_Comm_Resize (finer->comm_plan, NULL, COMM_TAG, &i); Zoltan_Comm_Do_Reverse (finer->comm_plan, COMM_TAG+1, (char*) finer->LevelData, 2 * sizeof(int), NULL, (char*) rbuffer); /* process data to assign partitions to expernal matches */ for (i = 0; i < 2 * finer->LevelSndCnt;) { int lno, partition; lno = rbuffer[i++]; partition = rbuffer[i++]; finer->Part[lno] = partition; } ZOLTAN_FREE (&rbuffer); Zoltan_Comm_Destroy (&finer->comm_plan); if (do_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->project, hgc->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->vcycle, hgc->Communicator); } if (vcycle_timing) ZOLTAN_TIMER_STOP(vcycle->timer, vcycle->timer_project, hgc->Communicator); } else { int *sendbuf = NULL, size; refine = 0; /* ints local and partition numbers */ if (finer->vlno) { sendbuf = (int*) ZOLTAN_MALLOC (2 * hg->nVtx * sizeof(int)); if (!sendbuf) { ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } for (i = 0; i < hg->nVtx; ++i) { sendbuf[2 * i] = finer->vlno[i]; /* assign local numbers */ sendbuf[2 * i + 1] = vcycle->Part[i];/* assign partition numbers */ } } ZOLTAN_FREE (&hgc); hgc = finer->hg->comm; /* updating hgc is required when the processors change */ /* Create comm plan to unredistributed processors */ err = Zoltan_Comm_Create(&finer->comm_plan, finer->vlno ? hg->nVtx : 0, finer->vdest, hgc->Communicator, COMM_TAG+2, &size); if (err != ZOLTAN_OK && err != ZOLTAN_WARN) { ZOLTAN_PRINT_ERROR(hgc->myProc, yo, "Zoltan_Comm_Create failed."); goto End; } /* allocate rec buffer to exchange sendbuf information */ rbuffer = NULL; if (finer->hg->nVtx) { rbuffer = (int*) ZOLTAN_MALLOC (2 * finer->hg->nVtx * sizeof(int)); if (!rbuffer) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory."); ZOLTAN_TRACE_EXIT(zz, yo); return ZOLTAN_MEMERR; } } /* Use plan to send partitions to the unredistributed processors */ Zoltan_Comm_Do(finer->comm_plan, COMM_TAG+3, (char *) sendbuf, 2*sizeof(int), (char *) rbuffer); MPI_Bcast(rbuffer, 2*finer->hg->nVtx, MPI_INT, 0, hgc->col_comm); /* process data to assign partitions to unredistributed processors */ for (i = 0; i < 2 * finer->hg->nVtx;) { int lno, partition; lno = rbuffer[i++]; partition = rbuffer[i++]; finer->Part[lno] = partition; } if (finer->vlno) ZOLTAN_FREE (&sendbuf); ZOLTAN_FREE (&rbuffer); Zoltan_Comm_Destroy (&finer->comm_plan); } } vcycle = finer; } /* while (vcycle) */ End: vcycle = del; while (vcycle) { if (vcycle_timing) { Zoltan_Timer_PrintAll(vcycle->timer, 0, hgc->Communicator, stdout); Zoltan_Timer_Destroy(&vcycle->timer); } if (vcycle->finer) { /* cleanup by level */ Zoltan_HG_HGraph_Free (vcycle->hg); if (vcycle->LevelData) Zoltan_Multifree (__FILE__, __LINE__, 4, &vcycle->Part, &vcycle->LevelMap, &vcycle->LevelData, &vcycle->hg); else if (vcycle->vlno) Zoltan_Multifree (__FILE__, __LINE__, 5, &vcycle->Part, &vcycle->vdest, &vcycle->vlno, &vcycle->LevelMap, &vcycle->hg); else Zoltan_Multifree (__FILE__, __LINE__, 3, &vcycle->Part, &vcycle->LevelMap, &vcycle->hg); } else /* cleanup top level */ Zoltan_Multifree (__FILE__, __LINE__, 2, &vcycle->LevelMap, &vcycle->LevelData); del = vcycle; vcycle = vcycle->finer; ZOLTAN_FREE(&del); } if (reset_geometric_matching) { strcpy(hgp->redm_str, reset_geometric_string); Zoltan_PHG_Set_Matching_Fn(hgp); } if (do_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->vcycle, hgc->Communicator); ZOLTAN_TRACE_EXIT(zz, yo) ; return err; }