int main(int argc, char *argv[]) { int rc, i, ngids, maxcol, ncolors; float ver; struct Zoltan_Struct *zz=NULL; #ifdef ZOLTANV31 int numGidEntries, numLidEntries; #else ZOLTAN_GRAPH_EVAL graph; #endif int *color; ZOLTAN_ID_PTR gid_list; UZData guz, *uz=&guz; int msg_tag = 9999; int nlvtx, next, maxdeg=0; double times[9]={0.,0.,0.,0.,0.,0.,0.,0.}; /* Used for timing measurements */ double gtimes[9]={0.,0.,0.,0.,0.,0.,0.,0.}; /* Used for timing measurements */ /****************************************************************** ** Initialize MPI and Zoltan ******************************************************************/ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &uz->myRank); MPI_Comm_size(MPI_COMM_WORLD, &uz->numProcs); MPI_Barrier(MPI_COMM_WORLD); times[0] = u_wseconds(); rc = Zoltan_Initialize(argc, argv, &ver); if (rc != ZOLTAN_OK){ fprintf(stderr, "Sorry Zoltan initialize failed...\n"); goto End; } zz = Zoltan_Create(MPI_COMM_WORLD); if (argc<3 && !uz->myRank) { fprintf(stderr, "usage: %s [meshR] [meshC] [X-point stencil] [procR] [procC] [ws-beta] [<ZoltanParam>=<Val>] ...\n\n", argv[0]); fprintf(stderr, "ws-beta: is the probablity of adding an edge to a vertex to generate Watts-Strogatz graphs\n"); fprintf(stderr, "Valid values for Stencil are 5, 7 and 9\n"); fprintf(stderr, "Zoltan Coloring Parameters and values are\n"); fprintf(stderr, "\tDISTANCE : 1 or 2\n"); fprintf(stderr, "\tSUPERSTEP_SIZE : suggested >= 100\n"); fprintf(stderr, "\tCOMM_PATTERN : S or A\n"); fprintf(stderr, "\tCOLOR_ORDER : I, B, U\n"); fprintf(stderr, "\tCOLORING_METHOD : F (for now)\n"); fprintf(stderr, "\n"); } uz->procR = uz->procC = 0; uz->meshR = uz->meshC = 1024; uz->stencil = 9; if (argc>1) uz->meshR = atoi(argv[1]); if (argc>2) uz->meshC = atoi(argv[2]); if (argc>3) uz->stencil = atoi(argv[3]); if (uz->stencil!=5 && uz->stencil!=7 && uz->stencil!=9) { fprintf(stderr, "\t invalid stencil value. Valid values are 5, 7 and 9. Assumed 9.\n"); uz->stencil = 9; } --uz->stencil; if (argc>4) uz->procR = atoi(argv[4]); if (argc>5) uz->procC = atoi(argv[5]); if (uz->procR <= 0 || uz->procC <= 0) computeProcMesh(uz); if (uz->procR*uz->procC!=uz->numProcs) { fprintf(stderr, "#Procs=%d but requested %dx%d Proc Mesh Partitioning...\n", uz->numProcs, uz->procR, uz->procC); goto End; } if (argc>6) uz->beta = atof(argv[6]); else uz->beta = 0.0; /* compute which part of mesh I will compute */ uz->myR = uz->myRank / uz->procC; uz->myC = uz->myRank % uz->procC; uz->_sr = uz->myR * (uz->meshR / uz->procR); uz->_er = (uz->myR+1) * (uz->meshR / uz->procR); if (uz->_er>uz->meshR) uz->_er = uz->meshR; uz->_sc = uz->myC * (uz->meshC / uz->procC); uz->_ec = (uz->myC+1) * (uz->meshC / uz->procC); if (uz->_ec>uz->meshC) uz->_ec = uz->meshC; if ( (uz->meshR % uz->procR) !=0 || (uz->meshC % uz->procC)!=0) { printf("Mesh dimensions are not divisible with proc mesh.\nRequested mesh is %dx%d and proc mesh is %d x %d\n", uz->meshR, uz->meshC, uz->procR, uz->procC); exit(1); } nlvtx= (uz->_er-uz->_sr) * (uz->_ec-uz->_sc); if (uz->myRank==0) printf("Running %s on %d x %d processor mesh, generating %d-point %d x %d mesh with beta=%.3lf\n", argv[0], uz->procR, uz->procC, uz->stencil+1, uz->meshR, uz->meshC, uz->beta); times[1] = u_wseconds(); uz->numredge = 0; uz->redgeto = NULL; if (uz->beta>0) { /* create random edges for WS graph */ int ngvtx=uz->meshC*uz->meshR, trsh=(int) (uz->beta*100.0); int ierr=0; int *edges=NULL, *redges=NULL, *proclist=NULL, nedge; ZOLTAN_COMM_OBJ *plan; uz->redgeto = (int *) malloc(nlvtx*sizeof(int)); for (i=0; i<nlvtx; ++i) { int rv = Zoltan_Rand_InRange(NULL, 100); if ( rv < trsh) { if ((uz->redgeto[i] = Zoltan_Rand_InRange(NULL, ngvtx))==gIDfLID(i)) /* is it a self edge */ uz->redgeto[i] = -1; else ++uz->numredge; } else uz->redgeto[i] = -1; } edges = (int *) malloc(sizeof(int)*2*uz->numredge); proclist = (int *) malloc(sizeof(int)*uz->numredge); next = 0; for (i=0; i<nlvtx; ++i) if (uz->redgeto[i]>0) { edges[2*next] = uz->redgeto[i]; edges[2*next+1] = gIDfLID(i); proclist[next] = pIDfGID(uz->redgeto[i]); ++next; } ierr = Zoltan_Comm_Create(&plan, uz->numredge, proclist, MPI_COMM_WORLD, msg_tag, &nedge); redges = (int *) malloc(sizeof(int)*2*nedge); --msg_tag; ierr |= Zoltan_Comm_Do(plan, msg_tag, (char *) edges, 2*sizeof(int), (char *) redges); ierr |= Zoltan_Comm_Destroy(&plan); free(proclist); free(edges); if (ierr) { printf("error while communicating edges!\n"); exit(1); } xadj = (int *) calloc(1+nlvtx, sizeof(int)); adj = (int *) malloc(sizeof(int)*nedge); for (i=0; i<nedge; ++i) { if (redges[2*i] < gID(uz->_sr, uz->_sc) || redges[2*i] >= gID(uz->_er, uz->_ec)) { printf("[%d/%d] received gid=%d doesn't blong to processor range [%d, %d) should go to proc %d\n", uz->myRank, uz->numProcs, redges[2*i], gID(uz->_sr, uz->_sc), gID(uz->_er, uz->_ec), pIDfGID(redges[2*i])); } ++xadj[lIDfGID(redges[2*i])]; } xadj[nlvtx] = nedge; maxdeg = xadj[0]; for (i=1; i<nlvtx; ++i) { maxdeg = xadj[i]>maxdeg ? xadj[i] : maxdeg; xadj[i] += xadj[i-1]; } for (i=0; i<nedge; ++i) { int u = lIDfGID(redges[2*i]); int v = redges[2*i+1]; adj[--xadj[u]] = v; } free(redges); } maxdeg += uz->stencil+1; adjTemp = (int *) malloc(sizeof(int)*2*maxdeg); times[2] = u_wseconds(); /* printf("My rank %d/%d at proc-mesh loc (%d, %d) generating [%d, %d) x [%d, %d) + %d random edges TotEdge=%d\n", uz->myRank, uz->numProcs, uz->myR, uz->myC, uz->_sr, uz->_er, uz->_sc, uz->_ec, uz->numredge, xadj[nlvtx]); */ printStats("Number of Vertices ", nlvtx, uz->myRank, uz->numProcs); if (xadj) printStats("Number of Rand Edges", xadj[nlvtx], uz->myRank, uz->numProcs); /* General parameters */ #ifndef ZOLTANV31 Zoltan_Set_Param(zz, "GRAPH_BUILD_TYPE", "FAST_NO_DUP"); #endif /* General parameters */ Zoltan_Set_Param(zz, "DEBUG_LEVEL", "3"); Zoltan_Set_Param(zz, "NUM_GID_ENTRIES", "1"); Zoltan_Set_Param(zz, "NUM_LID_ENTRIES", "1"); Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "0"); /* coloring parameters */ Zoltan_Set_Param(zz, "SUPERSTEP_SIZE", "500"); /* let's make S=500 default */ for (i=7; i<argc; ++i) { char param[256], *eq; if (!uz->myRank) printf("processing argv[%d]='%s'\n", i, argv[i]); strncpy(param, argv[i], sizeof(param)); eq = strchr(param, '='); if (!eq) { fprintf(stderr, "invalid argument '%s', Zoltan Paramters should be in the format <ZoltanParam>=<Val>\n", param); goto End; } *eq = 0; Zoltan_Set_Param(zz, param, eq+1); } #if 0 /* Graph parameters */ Zoltan_Set_Param(zz, "CHECK_GRAPH", "2"); #endif /* set call backs */ Zoltan_Set_Num_Obj_Fn(zz, get_number_of_objects, uz); Zoltan_Set_Obj_List_Fn(zz, get_object_list, uz); Zoltan_Set_Num_Edges_Multi_Fn(zz, get_num_edges_list, uz); Zoltan_Set_Edge_List_Multi_Fn(zz, get_edge_list, uz); #if 0 #ifndef ZOLTANV31 Zoltan_LB_Eval_Graph(zz, 0, &graph); if (!uz->myRank) { printf("EdgeCut Min=%8.0f Max=%8.0f Sum=%8.0f\n", graph.cuts[EVAL_GLOBAL_MIN], graph.cuts[EVAL_GLOBAL_MAX], graph.cuts[EVAL_GLOBAL_SUM]); printf("#Vertices Min=%8.0f Max=%8.0f Sum=%8.0f imbal=%.2f\n", graph.nobj[EVAL_GLOBAL_MIN], graph.nobj[EVAL_GLOBAL_MAX], graph.nobj[EVAL_GLOBAL_SUM], graph.obj_imbalance); } #endif #endif /* now color */ ngids = get_number_of_objects(uz, &rc); gid_list = (ZOLTAN_ID_PTR) malloc(sizeof(ZOLTAN_ID_TYPE) * ngids); #ifndef ZOLTANV31 next = 0; for (i=uz->_sr; i<uz->_er; ++i) { int j; for (j=uz->_sc; j<uz->_ec; ++j) { gid_list[next++] = i*uz->meshC + j; } } #endif color = (int *) malloc(sizeof(int) * ngids); MPI_Barrier(MPI_COMM_WORLD); times[3] = u_wseconds(); #ifdef ZOLTANV31 rc = Zoltan_Color(zz, /* input (all remaining fields are output) */ &numGidEntries, /* Number of integers used for a global ID */ &numLidEntries, /* Number of integers used for a local ID */ ngids, /* #objects to color in this proc */ gid_list, /* global ids of colored vertices */ NULL, /* we ignore local ids */ color); /* result color */ #else rc = Zoltan_Color(zz, /* input (all remaining fields are output) */ 1, /* Number of integers used for a global ID */ ngids, /* #objects to color in this proc */ gid_list, /* global ids of colored vertices */ color); /* result color */ #endif MPI_Barrier(MPI_COMM_WORLD); times[4] = u_wseconds(); MPI_Reduce(times, gtimes, 5, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); if (rc != ZOLTAN_OK) fprintf(stderr, "Zoltan_Color failed with return code %d...\n", rc); for (maxcol=i=0; i<ngids; ++i) if (color[i] > maxcol) maxcol = color[i]; MPI_Reduce(&maxcol, &ncolors, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); if (uz->myRank==0) { struct rusage usage; printf("%s setup Proc-0: %8.2lf Max: %8.2lf\n", argv[0], times[1]-times[0], gtimes[1]-gtimes[0]); printf("%s gen rand edges Proc-0: %8.2lf Max: %8.2lf\n", argv[0], times[2]-times[1], gtimes[2]-gtimes[1]); printf("%s set gids Proc-0: %8.2lf Max: %8.2lf\n", argv[0], times[3]-times[2], gtimes[3]-gtimes[2]); printf("%s Zoltan_Color call Proc-0: %8.2lf Max: %8.2lf\n", argv[0], times[4]-times[3], gtimes[4]-gtimes[3]); printf("%s Coloring Time : %.2lf # Colors used : %d\n", argv[0], gtimes[4]-gtimes[0], ncolors); getrusage(RUSAGE_SELF, &usage); printf("%s maxrss=%ld minflt=%ld majflt=%ld nswap=%ld\n", argv[0], usage.ru_maxrss, usage.ru_minflt, usage.ru_majflt, usage.ru_nswap); } #ifdef _DEBUG saveColor(argv[0], uz, (int *) gid_list, color, ngids); #endif /****************************************************************** ** Clean up ******************************************************************/ if (gid_list) free(gid_list); if (color) free(color); if (xadj) free(xadj); if (adj) free(adj); if (adjTemp) free(adjTemp); if (uz->redgeto) free(uz->redgeto); End: Zoltan_Destroy(&zz); MPI_Finalize(); return 0; }
int Zoltan_Random( ZZ *zz, /* The Zoltan structure. */ float *part_sizes, /* Input: Array of size zz->LB.Num_Global_Parts * zz->Obj_Weight_Dim containing the percentage of work to be assigned to each partition. */ int *num_import, /* Return -1. Random uses only export lists. */ ZOLTAN_ID_PTR *import_global_ids, /* Not used. */ ZOLTAN_ID_PTR *import_local_ids, /* Not used. */ int **import_procs, /* Not used. */ int **import_to_part, /* Not used. */ int *num_export, /* Output: Number of objects to export. */ ZOLTAN_ID_PTR *export_global_ids, /* Output: GIDs to export. */ ZOLTAN_ID_PTR *export_local_ids, /* Output: LIDs to export. */ int **export_procs, /* Output: Processsors to export to. */ int **export_to_part /* Output: Partitions to export to. */ ) { int ierr = ZOLTAN_OK; int i, count, num_obj; int max_export; double rand_frac = 1.0; /* Default is to move all objects. */ ZOLTAN_ID_PTR global_ids = NULL; ZOLTAN_ID_PTR local_ids = NULL; int *parts = NULL; float *dummy = NULL; static char *yo = "Zoltan_Random"; static int first_time = 1; ZOLTAN_TRACE_ENTER(zz, yo); /* Synchronize the random number generator. * This synchronization is needed only for sanity in our nightly testing. * If some other operation (eg., Zoltan_LB_Eval) changes the status of * the random number generator, the answers here will change. They won't * be wrong, but they will be different from our accepted answers. */ if (first_time) { Zoltan_Srand(zz->Seed, NULL); Zoltan_Rand(NULL); first_time=0; } /* No import lists computed. */ *num_import = -1; /* Get parameter values. */ Zoltan_Bind_Param(Random_params, "RANDOM_MOVE_FRACTION", (void *) &rand_frac); Zoltan_Assign_Param_Vals(zz->Params, Random_params, zz->Debug_Level, zz->Proc, zz->Debug_Proc); /* Get list of local objects. */ ierr = Zoltan_Get_Obj_List(zz, &num_obj, &global_ids, &local_ids, 0, &dummy, &parts); /* Bound number of objects to export. */ max_export = 1.5*rand_frac*num_obj; /* Allocate export lists. */ *export_global_ids = *export_local_ids = NULL; *export_procs = *export_to_part = NULL; if (max_export > 0) { if (!Zoltan_Special_Malloc(zz, (void **)export_global_ids, max_export, ZOLTAN_SPECIAL_MALLOC_GID) || !Zoltan_Special_Malloc(zz, (void **)export_local_ids, max_export, ZOLTAN_SPECIAL_MALLOC_LID) || !Zoltan_Special_Malloc(zz, (void **)export_procs, max_export, ZOLTAN_SPECIAL_MALLOC_INT) || !Zoltan_Special_Malloc(zz, (void **)export_to_part, max_export, ZOLTAN_SPECIAL_MALLOC_INT)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } } /* Randomly assign ids to procs. */ count=0; for (i=0; i<num_obj; i++){ /* Randomly select some objects to move (export) */ if ((count<max_export) && (Zoltan_Rand(NULL)<rand_frac*ZOLTAN_RAND_MAX)){ /* export_global_ids[count] = global_ids[i]; */ ZOLTAN_SET_GID(zz, &((*export_global_ids)[count*zz->Num_GID]), &global_ids[i*zz->Num_GID]); if (local_ids) /* export_local_ids[count] = local_ids[i]; */ ZOLTAN_SET_LID(zz, &((*export_local_ids)[count*zz->Num_LID]), &local_ids[i*zz->Num_LID]); /* Randomly pick new partition number. */ (*export_to_part)[count] = Zoltan_Rand_InRange(NULL, zz->LB.Num_Global_Parts); /* Processor number is derived from partition number. */ (*export_procs)[count] = Zoltan_LB_Part_To_Proc(zz, (*export_to_part)[count], &global_ids[i*zz->Num_GID]); /* printf("Debug: Export gid %u to part %d and proc %d.\n", (*export_global_ids)[count], (*export_to_part)[count], (*export_procs)[count]); */ ++count; } } (*num_export) = count; End: /* Free local memory, but not export lists. */ ZOLTAN_FREE(&global_ids); ZOLTAN_FREE(&local_ids); ZOLTAN_FREE(&parts); ZOLTAN_TRACE_EXIT(zz, yo); return ierr; }
int Zoltan_PHG_CoarsePartition( ZZ *zz, HGraph *phg, /* Input: coarse hypergraph -- distributed! */ int numPart, /* Input: number of partitions to generate. */ float *part_sizes, /* Input: array of size numPart listing target sizes (% of work) for the partitions */ Partition part, /* Input: array of initial partition assignments. Output: array of computed partition assignments. */ PHGPartParams *hgp /* Input: parameters to use. */ ) { /* * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph. * Typically, this routine is called at the bottom level in a * multilevel scheme (V-cycle). * It gathers the distributed hypergraph to each processor and computes * a decomposition of the serial hypergraph. * It computes a different partition on each processor * using different random numbers (and possibly also * different algorithms) and selects the best. */ char *yo = "Zoltan_PHG_CoarsePartition"; int ierr = ZOLTAN_OK; int i, si, j; static PHGComm scomm; /* Serial communicator info */ static int first_time = 1; HGraph *shg = NULL; /* Serial hypergraph gathered from phg */ int *spart = NULL; /* Partition vectors for shg. */ int *new_part = NULL; /* Ptr to new partition vector. */ float *bestvals = NULL; /* Best cut values found so far */ int worst, new_cand; float bal, cut, worst_cut; int fine_timing = (hgp->use_timers > 2); struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int local_coarse_part = hgp->LocalCoarsePartition; /* Number of iterations to try coarse partitioning on each proc. */ /* 10 when p=1, and 1 when p is large. */ const int num_coarse_iter = 1 + 9/zz->Num_Proc; ZOLTAN_TRACE_ENTER(zz, yo); if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Force LocalCoarsePartition if large global graph */ #define LARGE_GRAPH_VTX 64000 #define LARGE_GRAPH_PINS 256000 if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){ /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */ local_coarse_part = 1; } /* take care of all special cases first */ if (!strcasecmp(hgp->coarsepartition_str, "no") || !strcasecmp(hgp->coarsepartition_str, "none")) { /* Do no coarse partitioning. */ /* Do a sanity test and mapping to parts [0,...,numPart-1] */ int first = 1; PHGComm *hgc=phg->comm; Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm); if (hgp->UsePrefPart) { for (i = 0; i < phg->nVtx; i++) { /* Impose fixed vertex/preferred part constraints. */ if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */ /* randomly assigned to a part */ part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart); } else { if (phg->bisec_split < 0) /* direct k-way, use part numbers directly */ part[i] = phg->pref_part[i]; else /* recursive bisection, map to 0-1 part numbers */ part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1); } } } else { for (i = 0; i < phg->nVtx; i++) { if (part[i] >= numPart || part[i]<0) { if (first) { ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts."); first = 0; ierr = ZOLTAN_WARN; } part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart; } } } } else if (numPart == 1) { /* everything goes in the one partition */ for (i = 0; i < phg->nVtx; i++) part[i] = 0; } else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { /* more partitions than vertices, trivial answer */ for (i = 0; i < phg->nVtx; i++) part[i] = phg->dist_x[phg->comm->myProc_x]+i; } else if (local_coarse_part) { /* Apply local partitioner to each column */ ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp, hgp->CoarsePartition); } else { /* Normal case: * Gather distributed HG to each processor; * compute different partitioning on each processor; * select the "best" result. */ ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition; /* Select different coarse partitioners for processors here. */ CoarsePartition = hgp->CoarsePartition; if (CoarsePartition == NULL) { /* auto */ /* Select a coarse partitioner from the array of coarse partitioners */ CoarsePartition = CoarsePartitionFns[phg->comm->myProc % NUM_COARSEPARTITION_FNS]; } if (phg->comm->nProc == 1) { /* Serial and parallel hgraph are the same. */ shg = phg; } else { /* Set up a serial communication struct for gathered HG */ if (first_time) { scomm.nProc_x = scomm.nProc_y = 1; scomm.myProc_x = scomm.myProc_y = 0; scomm.Communicator = MPI_COMM_SELF; scomm.row_comm = MPI_COMM_SELF; scomm.col_comm = MPI_COMM_SELF; scomm.myProc = 0; scomm.nProc = 1; first_time = 0; } scomm.RNGState = Zoltan_Rand(NULL); scomm.RNGState_row = Zoltan_Rand(NULL); scomm.RNGState_col = Zoltan_Rand(NULL); scomm.zz = zz; /* * Gather parallel hypergraph phg to each processor, creating * serial hypergraph shg. */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator); } ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather."); goto End; } if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } } /* * Allocate partition array spart for the serial hypergraph shg * and partition shg. */ spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1), sizeof(int)); bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); if ((!spart) || (!bestvals)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory."); ierr = ZOLTAN_MEMERR; goto End; } /* Compute several coarse partitionings. */ /* Keep the NUM_PART_KEEP best ones around. */ /* Currently, only the best one is used. */ /* Set RNG so different procs compute different parts. */ Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL); new_cand = 0; new_part = spart; for (i=0; i< num_coarse_iter; i++){ int savefmlooplimit=hgp->fm_loop_limit; /* Overwrite worst partition with new candidate. */ ierr = CoarsePartition(zz, shg, numPart, part_sizes, new_part, hgp); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from CoarsePartition."); goto End; } /* time refinement step in coarse partitioner */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator); } /* UVCUVC: Refine new candidate: only one pass is enough. */ hgp->fm_loop_limit = 1; Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp); hgp->fm_loop_limit = savefmlooplimit; /* stop refinement timer */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Decide if candidate is in the top tier or not. */ /* Our objective is a combination of cuts and balance */ bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, numPart, new_part); cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr); /* Use ratio-cut as our objective. There are many other options! */ bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */ if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut."); goto End; } if (i<NUM_PART_KEEP) new_cand = i+1; else { /* find worst partition vector, to overwrite it */ /* future optimization: keep bestvals sorted */ worst = 0; worst_cut = bestvals[0]; for (j=1; j<NUM_PART_KEEP+1; j++){ if (worst_cut < bestvals[j]){ worst_cut = bestvals[j]; worst = j; } } new_cand = worst; } new_part = spart+new_cand*(shg->nVtx); } /* Copy last partition vector such that all the best ones are contiguous starting at spart. */ for (i=0; i<shg->nVtx; i++){ new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i]; } /* Also update bestvals */ bestvals[new_cand] = bestvals[NUM_PART_KEEP]; /* Evaluate and select the best. */ /* For now, only pick the best one, in the future we pick the k best. */ ierr = pick_best(zz, hgp, phg->comm, shg, numPart, MIN(NUM_PART_KEEP, num_coarse_iter), spart, bestvals); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from pick_best."); goto End; } if (phg->comm->nProc > 1) { /* Map gathered partition back to 2D distribution */ for (i = 0; i < phg->nVtx; i++) { /* KDDKDD Assume vertices in serial HG are ordered by GNO of phg */ si = VTX_LNO_TO_GNO(phg, i); part[i] = spart[si]; } Zoltan_HG_HGraph_Free(shg); ZOLTAN_FREE(&shg); } else { /* single processor */ for (i = 0; i < phg->nVtx; i++) part[i] = spart[i]; } ZOLTAN_FREE(&spart); ZOLTAN_FREE(&bestvals); } End: if (fine_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TRACE_EXIT(zz, yo); return ierr; }