void Zoltan_HG_Print( ZZ *zz, HGraph *hg, Partition parts, FILE *fp, char *str ) { /* Routine to print hypergraph weights and edges. Assumes serial execution; * put inside Zoltan_Print_Sync_Start/Zoltan_Print_Sync_End for parallel * programs. */ int i, j; int num_vwgt; int num_ewgt; float *sum; char *yo = "Zoltan_HG_Print"; if (hg == NULL) return; ZOLTAN_TRACE_ENTER(zz, yo); num_vwgt = hg->VtxWeightDim; num_ewgt = hg->EdgeWeightDim; sum = (float *) ZOLTAN_MALLOC(MAX(num_vwgt, num_ewgt) * sizeof(float)); fprintf(fp, "%s nVtx=%d nEdge=%d nPins=%d vWgt=%d eWgt=%d\n", str, hg->nVtx, hg->nEdge, hg->nPins, hg->VtxWeightDim, hg->EdgeWeightDim); /* Print Vertex Info */ fprintf(fp, "%s Vertices: (edges)\n", str); for (i = 0; i < hg->nVtx; i++) { fprintf(fp, "%d (%d) in part %d: ", i, VTX_LNO_TO_GNO(hg, i), (parts ? parts[i] : -1)); fprintf(fp, "("); for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) fprintf(fp, "%d ", hg->vedge[j]); fprintf(fp, ")\n"); } if (hg->vwgt != NULL) { for (j = 0; j < num_vwgt; j++) sum[j] = 0; fprintf(fp, "%s Vertices: [weights])\n", str); for (i = 0; i < hg->nVtx; i++) { fprintf(fp, "%d (%d): [", i, VTX_LNO_TO_GNO(hg, i)); for (j = 0; j < num_vwgt; j++) { fprintf(fp, "%f ", hg->vwgt[i*num_vwgt + j]); sum[j] += hg->vwgt[i*num_vwgt + j]; } fprintf(fp, "])\n"); } fprintf(fp, "Total vertex weight = ["); for (j = 0; j < num_vwgt; j++) fprintf(fp, "%f ", sum[j]); fprintf(fp, "]\n"); } /* Print Hyperedge Info */ fprintf(fp, "%s Hyperedges: (vertices)\n", str); for (i = 0; i < hg->nEdge; i++) { fprintf(fp, "%d (%d): ", i, EDGE_LNO_TO_GNO(hg, i)); fprintf(fp, "("); for (j = hg->hindex[i]; j < hg->hindex[i+1]; j++) fprintf(fp, "%d ", hg->hvertex[j]); fprintf(fp, ")\n"); } if (hg->ewgt != NULL) { for (j = 0; j < num_ewgt; j++) sum[j] = 0; fprintf(fp, "%s Hyperedge Weights: [weights]\n", str); for (i = 0; i < hg->nEdge; i++) { fprintf(fp, "%d (%d): ", i, EDGE_LNO_TO_GNO(hg, i)); fprintf(fp, "["); for (j = 0; j < num_ewgt; j++) { fprintf(fp, "%f ", hg->ewgt[i*num_ewgt + j]); sum[j] += hg->ewgt[i*num_ewgt + j]; } fprintf(fp, "])\n"); } fprintf(fp, "Total hyperedge weight = ["); for (j = 0; j < num_ewgt; j++) fprintf(fp, "%f ", sum[j]); fprintf(fp, "]\n"); } ZOLTAN_FREE(&sum); ZOLTAN_TRACE_EXIT(zz, yo); }
int Zoltan_PHG_CoarsePartition( ZZ *zz, HGraph *phg, /* Input: coarse hypergraph -- distributed! */ int numPart, /* Input: number of partitions to generate. */ float *part_sizes, /* Input: array of size numPart listing target sizes (% of work) for the partitions */ Partition part, /* Input: array of initial partition assignments. Output: array of computed partition assignments. */ PHGPartParams *hgp /* Input: parameters to use. */ ) { /* * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph. * Typically, this routine is called at the bottom level in a * multilevel scheme (V-cycle). * It gathers the distributed hypergraph to each processor and computes * a decomposition of the serial hypergraph. * It computes a different partition on each processor * using different random numbers (and possibly also * different algorithms) and selects the best. */ char *yo = "Zoltan_PHG_CoarsePartition"; int ierr = ZOLTAN_OK; int i, si, j; static PHGComm scomm; /* Serial communicator info */ static int first_time = 1; HGraph *shg = NULL; /* Serial hypergraph gathered from phg */ int *spart = NULL; /* Partition vectors for shg. */ int *new_part = NULL; /* Ptr to new partition vector. */ float *bestvals = NULL; /* Best cut values found so far */ int worst, new_cand; float bal, cut, worst_cut; int fine_timing = (hgp->use_timers > 2); struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int local_coarse_part = hgp->LocalCoarsePartition; /* Number of iterations to try coarse partitioning on each proc. */ /* 10 when p=1, and 1 when p is large. */ const int num_coarse_iter = 1 + 9/zz->Num_Proc; ZOLTAN_TRACE_ENTER(zz, yo); if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Force LocalCoarsePartition if large global graph */ #define LARGE_GRAPH_VTX 64000 #define LARGE_GRAPH_PINS 256000 if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){ /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */ local_coarse_part = 1; } /* take care of all special cases first */ if (!strcasecmp(hgp->coarsepartition_str, "no") || !strcasecmp(hgp->coarsepartition_str, "none")) { /* Do no coarse partitioning. */ /* Do a sanity test and mapping to parts [0,...,numPart-1] */ int first = 1; PHGComm *hgc=phg->comm; Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm); if (hgp->UsePrefPart) { for (i = 0; i < phg->nVtx; i++) { /* Impose fixed vertex/preferred part constraints. */ if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */ /* randomly assigned to a part */ part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart); } else { if (phg->bisec_split < 0) /* direct k-way, use part numbers directly */ part[i] = phg->pref_part[i]; else /* recursive bisection, map to 0-1 part numbers */ part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1); } } } else { for (i = 0; i < phg->nVtx; i++) { if (part[i] >= numPart || part[i]<0) { if (first) { ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts."); first = 0; ierr = ZOLTAN_WARN; } part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart; } } } } else if (numPart == 1) { /* everything goes in the one partition */ for (i = 0; i < phg->nVtx; i++) part[i] = 0; } else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { /* more partitions than vertices, trivial answer */ for (i = 0; i < phg->nVtx; i++) part[i] = phg->dist_x[phg->comm->myProc_x]+i; } else if (local_coarse_part) { /* Apply local partitioner to each column */ ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp, hgp->CoarsePartition); } else { /* Normal case: * Gather distributed HG to each processor; * compute different partitioning on each processor; * select the "best" result. */ ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition; /* Select different coarse partitioners for processors here. */ CoarsePartition = hgp->CoarsePartition; if (CoarsePartition == NULL) { /* auto */ /* Select a coarse partitioner from the array of coarse partitioners */ CoarsePartition = CoarsePartitionFns[phg->comm->myProc % NUM_COARSEPARTITION_FNS]; } if (phg->comm->nProc == 1) { /* Serial and parallel hgraph are the same. */ shg = phg; } else { /* Set up a serial communication struct for gathered HG */ if (first_time) { scomm.nProc_x = scomm.nProc_y = 1; scomm.myProc_x = scomm.myProc_y = 0; scomm.Communicator = MPI_COMM_SELF; scomm.row_comm = MPI_COMM_SELF; scomm.col_comm = MPI_COMM_SELF; scomm.myProc = 0; scomm.nProc = 1; first_time = 0; } scomm.RNGState = Zoltan_Rand(NULL); scomm.RNGState_row = Zoltan_Rand(NULL); scomm.RNGState_col = Zoltan_Rand(NULL); scomm.zz = zz; /* * Gather parallel hypergraph phg to each processor, creating * serial hypergraph shg. */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator); } ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather."); goto End; } if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } } /* * Allocate partition array spart for the serial hypergraph shg * and partition shg. */ spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1), sizeof(int)); bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); if ((!spart) || (!bestvals)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory."); ierr = ZOLTAN_MEMERR; goto End; } /* Compute several coarse partitionings. */ /* Keep the NUM_PART_KEEP best ones around. */ /* Currently, only the best one is used. */ /* Set RNG so different procs compute different parts. */ Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL); new_cand = 0; new_part = spart; for (i=0; i< num_coarse_iter; i++){ int savefmlooplimit=hgp->fm_loop_limit; /* Overwrite worst partition with new candidate. */ ierr = CoarsePartition(zz, shg, numPart, part_sizes, new_part, hgp); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from CoarsePartition."); goto End; } /* time refinement step in coarse partitioner */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator); } /* UVCUVC: Refine new candidate: only one pass is enough. */ hgp->fm_loop_limit = 1; Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp); hgp->fm_loop_limit = savefmlooplimit; /* stop refinement timer */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Decide if candidate is in the top tier or not. */ /* Our objective is a combination of cuts and balance */ bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, numPart, new_part); cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr); /* Use ratio-cut as our objective. There are many other options! */ bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */ if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut."); goto End; } if (i<NUM_PART_KEEP) new_cand = i+1; else { /* find worst partition vector, to overwrite it */ /* future optimization: keep bestvals sorted */ worst = 0; worst_cut = bestvals[0]; for (j=1; j<NUM_PART_KEEP+1; j++){ if (worst_cut < bestvals[j]){ worst_cut = bestvals[j]; worst = j; } } new_cand = worst; } new_part = spart+new_cand*(shg->nVtx); } /* Copy last partition vector such that all the best ones are contiguous starting at spart. */ for (i=0; i<shg->nVtx; i++){ new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i]; } /* Also update bestvals */ bestvals[new_cand] = bestvals[NUM_PART_KEEP]; /* Evaluate and select the best. */ /* For now, only pick the best one, in the future we pick the k best. */ ierr = pick_best(zz, hgp, phg->comm, shg, numPart, MIN(NUM_PART_KEEP, num_coarse_iter), spart, bestvals); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from pick_best."); goto End; } if (phg->comm->nProc > 1) { /* Map gathered partition back to 2D distribution */ for (i = 0; i < phg->nVtx; i++) { /* KDDKDD Assume vertices in serial HG are ordered by GNO of phg */ si = VTX_LNO_TO_GNO(phg, i); part[i] = spart[si]; } Zoltan_HG_HGraph_Free(shg); ZOLTAN_FREE(&shg); } else { /* single processor */ for (i = 0; i < phg->nVtx; i++) part[i] = spart[i]; } ZOLTAN_FREE(&spart); ZOLTAN_FREE(&bestvals); } End: if (fine_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TRACE_EXIT(zz, yo); return ierr; }
void print_hypergraph(ZZ *zz, ZHG *zhg, int sumWeight) { int i, j, npins; int ewdim = zz->Edge_Weight_Dim; int vwdim = zhg->objWeightDim; float sum; float *wgt, *vwgt; int *pin, *owner, *lno; HGraph *hg = &zhg->HG; int p = zz->Proc; /* The ZHG structure contains the hypergraph returned by the query functions, * including modifications based on ADD_OBJ_WEIGHT and PHG_EDGE_WEIGHT_OPERATION. * If the PHG hypergraph build has completed, the edge list only contains the removed * edges. If LB_Eval build the ZHG structure, it contains all edges. * * the HGraph structure contains that hypergraph with modifications made * for the PHG algorithm. This may include addition of repartition * vertices and edges, and removal of dense edges. */ wgt = zhg->objWeight; printf("(%d) %d INPUT VERTICES (out of %d) : gno (gid/lid) (weights) nhedges fixed inpart outpart objSize)\n",p, zhg->nObj, zhg->globalObj); for (i=0; i<zhg->nObj; i++){ printf(" %d (",zhg->objGNO[i]); if (zhg->objGID) printf("%d/",zhg->objGID[i]); else printf("-/"); if (zhg->objLID) printf("%d) (",zhg->objLID[i]); else printf("/-) ("); for (j=0; j < vwdim; j++){ printf("%f",*wgt++); if (j < vwdim-1) printf(", "); } if (zhg->numHEdges) printf(") %d ",zhg->numHEdges[i]); else printf(") - "); if (zhg->fixed) printf(" %d ",zhg->fixed[i]); else printf(" - "); if (zhg->Input_Parts) printf(" %d ",zhg->Input_Parts[i]); else printf(" - "); if (zhg->Output_Parts) printf(" %d ",zhg->Output_Parts[i]); else printf(" - "); if (zhg->AppObjSizes) printf(" %d ",zhg->AppObjSizes[i]); else printf(" - "); printf("\n"); } printf("\n"); wgt = zhg->Ewgt; pin = zhg->pinGNO; owner = zhg->Pin_Procs; printf("(%d) %d INPUT or REMOVED EDGES (out of %d), %d pins: gno size (weights) (pinGNO/pinProc)\n", p, zhg->nHedges, zhg->globalHedges, zhg->nPins); for (i=0; i < zhg->nHedges; i++){ printf(" %d %d (", zhg->edgeGNO[i], zhg->Esize[i]); if (wgt){ for (j=0; j < ewdim; j++){ printf("%f",*wgt++); if (j < ewdim - 1) printf(", "); } } printf(") ("); for (j=0; j < zhg->Esize[i]; j++){ printf("%d/%d", *pin++, *owner++); if (j < zhg->Esize[i] - 1) printf(" "); } printf(")\n"); } printf("\n"); printf("(%d) %d PHG EDGES (%d weights), %d total PHG PINS:\n", p, hg->nEdge, ewdim, hg->nPins); wgt = hg->ewgt; lno = hg->hvertex; vwgt = hg->vwgt; for (i=0; i<hg->nEdge; i++){ npins = hg->hindex[i+1] - hg->hindex[i]; printf(" edge %d: ",EDGE_LNO_TO_GNO(hg, i)); for (j=0; j<ewdim; j++){ printf(" %f",*wgt++); } printf("\n %d pins: ", npins); for (j=0; j<npins; j++){ printf("%d ", *lno++); } printf("\n"); } printf("\n"); printf("(%d) %d PHG PIN global numbers and %d weights:\n", p, hg->nVtx, vwdim); sum = 0; for (i=0; i<hg->nVtx; i++){ printf(" %d %d: ", i, VTX_LNO_TO_GNO(hg, i)); for (j=0; j<vwdim; j++){ if (j==sumWeight) sum += *vwgt; printf("%f ", *vwgt++); } printf("\n"); } printf("\n"); if (sum > 0.0) printf("(%d) Weight %d sums to %f\n\n",p, sumWeight+1,sum); }
void Zoltan_PHG_Plot_2D_Distrib( ZZ *zz, HGraph *phg ) { /* Routine that produces gnuplot output of 2D data distribution in form of * a matrix. * One column for each vertex. * One row for each hyperedge. * Separate files are produced for each processor. * Vertex and edge global node numbers are used for "coordinates" in plotting. * No partitioning information is displayed; only the 2D data distribution * is shown. */ static int cnt = 0; char filename[32]; FILE *fp = NULL; int i, j; int egno, vgno; sprintf(filename, "phg%02d.%02d", cnt, zz->Proc); fp = fopen(filename, "w"); for (i = 0; i < phg->nEdge; i++) { egno = EDGE_LNO_TO_GNO(phg, i); for (j = phg->hindex[i]; j < phg->hindex[i+1]; j++) { vgno = VTX_LNO_TO_GNO(phg, phg->hvertex[j]); fprintf(fp, "%d %d\n", vgno, -egno); } } fclose(fp); if (zz->Proc == 0) { sprintf(filename, "phg%02d.gnuload", cnt); fp = fopen(filename, "w"); fprintf(fp, "set data style points\n"); fprintf(fp, "set pointsize 5\n"); fprintf(fp, "set nokey\n"); fprintf(fp, "set xlabel \"vertices\"\n"); fprintf(fp, "set ylabel \"-hyperedges\"\n"); fprintf(fp, "plot "); for (i = 0; i < zz->Num_Proc; i++) { fprintf(fp, "\"phg%02d.%02d\"", cnt, i); if (i != zz->Num_Proc-1) fprintf(fp, ", "); else fprintf(fp, "\n"); } fclose(fp); } /* Sanity check to ensure Mirror is working correctly */ /* Don't need to generate both sets of files, but they should differ only * in the order of the points */ sprintf(filename, "phgmirror%02d.%02d", cnt, zz->Proc); fp = fopen(filename, "w"); for (i = 0; i < phg->nVtx; i++) { vgno = VTX_LNO_TO_GNO(phg, i); for (j = phg->vindex[i]; j < phg->vindex[i+1]; j++) { egno = EDGE_LNO_TO_GNO(phg, phg->vedge[j]); fprintf(fp, "%d %d\n", vgno, -egno); } } fclose(fp); if (zz->Proc == 0) { sprintf(filename, "phgmirror%02d.gnuload", cnt); fp = fopen(filename, "w"); fprintf(fp, "set data style points\n"); fprintf(fp, "set pointsize 5\n"); fprintf(fp, "set nokey\n"); fprintf(fp, "set xlabel \"vertices\"\n"); fprintf(fp, "set ylabel \"-hyperedges\"\n"); fprintf(fp, "plot "); for (i = 0; i < zz->Num_Proc; i++) { fprintf(fp, "\"phgmirror%02d.%02d\"", cnt, i); if (i != zz->Num_Proc-1) fprintf(fp, ", "); else fprintf(fp, "\n"); } fclose(fp); } cnt++; }