static int Zoltan_LB( ZZ *zz, int include_parts, /* Flag indicating whether to generate part informtion; 0 if called by Zoltan_LB_Balance, 1 if called by Zoltan_LB_Partition. */ int *changes, /* Set to zero or one depending on if Zoltan determines a new decomposition or not: zero - No changes to the decomposition were made by the load-balancing algorithm; migration is not needed. one - A new decomposition is suggested by the load-balancer; migration is needed to establish the new decomposition. */ int *num_gid_entries, /* The number of array entries in a global ID; set to be the max over all processors in zz->Communicator of the parameter Num_Global_ID_Entries. */ int *num_lid_entries, /* The number of array entries in a local ID; set to be the max over all processors in zz->Communicator of the parameter Num_Local_ID_Entries. */ int *num_import_objs, /* The number of non-local objects in the processor's new decomposition. */ ZOLTAN_ID_PTR *import_global_ids,/* Array of global IDs for non-local objects (i.e., objs to be imported) in the processor's new decomposition. */ ZOLTAN_ID_PTR *import_local_ids, /* Array of local IDs for non-local objects (i.e., objs to be imported) in the processor's new decomposition. */ int **import_procs, /* Array of processor IDs for processors currently owning non-local objects (i.e., objs to be imported) in this processor's new decomposition. */ int **import_to_part, /* Partition to which the objects should be imported. */ int *num_export_objs, /* The number of local objects that need to be exported from the processor to establish the new decomposition. */ ZOLTAN_ID_PTR *export_global_ids,/* Array of global IDs for objects that need to be exported (assigned and sent to other processors) to establish the new decomposition. */ ZOLTAN_ID_PTR *export_local_ids, /* Array of local IDs for objects that need to be exported (assigned and sent to other processors) to establish the new decomposition. */ int **export_procs, /* Array of destination processor IDs for objects that need to be exported to establish the new decomposition. */ int **export_to_part /* Partition to which objects should be exported. */ ) { /* * Main load-balancing routine. * Input: a Zoltan structure with appropriate function pointers set. * Output: * changes * num_import_objs * import_global_ids * import_local_ids * import_procs * import_to_part * num_export_objs * export_global_ids * export_local_ids * export_procs * export_to_part * Return values: * Zoltan error code. */ char *yo = "Zoltan_LB"; int gmax; /* Maximum number of imported/exported objects over all processors. */ int error = ZOLTAN_OK; /* Error code */ double start_time, end_time; double lb_time[2] = {0.0,0.0}; char msg[256]; int comm[3],gcomm[3]; float *part_sizes = NULL, *fdummy = NULL; int wgt_dim, part_dim; int all_num_obj, i, ts, idIdx; struct Hash_Node **ht; int *export_all_procs, *export_all_to_part, *parts=NULL; ZOLTAN_ID_PTR all_global_ids=NULL, all_local_ids=NULL; ZOLTAN_ID_PTR gid; #ifdef ZOLTAN_OVIS struct OVIS_parameters ovisParameters; #endif ZOLTAN_TRACE_ENTER(zz, yo); if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS){ printf("Build configuration:\n"); Zoltan_Print_Configuration(" "); printf("\n"); Zoltan_Print_Key_Params(zz); } start_time = Zoltan_Time(zz->Timer); #ifdef ZOLTAN_OVIS Zoltan_OVIS_Setup(zz, &ovisParameters); if (zz->Proc == 0) printf("OVIS PARAMETERS %s %s %d %f\n", ovisParameters.hello, ovisParameters.dll, ovisParameters.outputLevel, ovisParameters.minVersion); ovis_enabled(zz->Proc, ovisParameters.dll); #endif /* * Compute Max number of array entries per ID over all processors. * Compute Max number of return arguments for Zoltan_LB_Balance. * This is a sanity-maintaining step; we don't want different * processors to have different values for these numbers. */ comm[0] = zz->Num_GID; comm[1] = zz->Num_LID; comm[2] = zz->LB.Return_Lists; MPI_Allreduce(comm, gcomm, 3, MPI_INT, MPI_MAX, zz->Communicator); zz->Num_GID = *num_gid_entries = gcomm[0]; zz->Num_LID = *num_lid_entries = gcomm[1]; zz->LB.Return_Lists = gcomm[2]; /* assume no changes */ *changes = 0; *num_import_objs = *num_export_objs = 0; *import_global_ids = NULL; *import_local_ids = NULL; *import_procs = NULL; *import_to_part = NULL; *export_global_ids = NULL; *export_local_ids = NULL; *export_procs = NULL; *export_to_part = NULL; /* * Return if this processor is not in the Zoltan structure's * communicator. */ if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) goto End; if (zz->LB.Method == NONE) { if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) printf("%s Balancing method selected == NONE; no balancing performed\n", yo); error = ZOLTAN_WARN; goto End; } /* * Sync the random number generator across processors. */ Zoltan_Srand_Sync(Zoltan_Rand(NULL), NULL, zz->Communicator); /* Since generating a new partition, need to free old mapping vector */ zz->LB.OldRemap = zz->LB.Remap; zz->LB.Remap = NULL; error = Zoltan_LB_Build_PartDist(zz); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) goto End; if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) { int i, np, fp; for (i = 0; i < zz->Num_Proc; i++) { Zoltan_LB_Proc_To_Part(zz, i, &np, &fp); printf("%d Proc_To_Part Proc %d NParts %d FPart %d\n", zz->Proc, i, np, fp); } } /* * Generate parts sizes. */ #ifdef ZOLTAN_OVIS /* set part sizes computed by OVIS, if requested. Processes set only their own value */ { float part_sizes[1]; int part_ids[1], wgt_idx[1]; wgt_idx[0] = 0; part_ids[0] = 0; ovis_getPartsize(&(part_sizes[0])); printf("Rank %d ps %f\n",zz->Proc, part_sizes[0]); /* clear out old part size info first */ Zoltan_LB_Set_Part_Sizes(zz, 0, -1, NULL, NULL, NULL); Zoltan_LB_Set_Part_Sizes(zz, 0, 1, part_ids, wgt_idx, part_sizes); } #endif wgt_dim = zz->Obj_Weight_Dim; part_dim = ((wgt_dim > 0) ? wgt_dim : 1); part_sizes = (float *) ZOLTAN_MALLOC(sizeof(float) * part_dim * zz->LB.Num_Global_Parts); if (part_sizes == NULL) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); error = ZOLTAN_MEMERR; goto End; } /* Get part sizes. */ Zoltan_LB_Get_Part_Sizes(zz, zz->LB.Num_Global_Parts, part_dim, part_sizes); #ifdef ZOLTAN_OVIS /* if (ovisParameters.outputlevel > 3) */ { int myRank = zz->Proc; if (myRank == 0){ int i, j; for (i = 0; i < zz->LB.Num_Global_Parts; i++){ for (j = 0; j < part_dim; j++){ printf("Rank %d AG: part_sizes[%d] = %f (Num_Global_Parts = %d, part_dim = %d)\n",zz->Proc, (i*part_dim+j), part_sizes[i*part_dim+j],zz->LB.Num_Global_Parts, part_dim); } } } } #endif /* * Call the actual load-balancing function. */ error = zz->LB.LB_Fn(zz, part_sizes, num_import_objs, import_global_ids, import_local_ids, import_procs, import_to_part, num_export_objs, export_global_ids, export_local_ids, export_procs, export_to_part); ZOLTAN_FREE(&part_sizes); if (error == ZOLTAN_FATAL || error == ZOLTAN_MEMERR){ sprintf(msg, "Partitioning routine returned code %d.", error); #ifdef HOST_LINUX if ((error == ZOLTAN_MEMERR) && (Zoltan_Memory_Get_Debug() > 0)){ Zoltan_write_linux_meminfo(0, "State of /proc/meminfo after malloc failure\n", 0); } #endif ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } else if (error){ if (zz->Debug_Level >ZOLTAN_DEBUG_NONE) { sprintf(msg, "Partitioning routine returned code %d.", error); ZOLTAN_PRINT_WARN(zz->Proc, yo, msg); } } ZOLTAN_TRACE_DETAIL(zz, yo, "Done partitioning"); if (*num_import_objs >= 0) MPI_Allreduce(num_import_objs, &gmax, 1, MPI_INT, MPI_MAX, zz->Communicator); else /* use export data */ MPI_Allreduce(num_export_objs, &gmax, 1, MPI_INT, MPI_MAX, zz->Communicator); if (gmax == 0) { /* * Decomposition was not changed by the load balancing; no migration * is needed. */ if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) printf("%s No changes to the decomposition due to partitioning; " "no migration is needed.\n", yo); /* * Reset num_import_objs and num_export_objs; don't want to return * -1 for the arrays that weren't returned by ZOLTAN_LB_FN. */ *num_import_objs = *num_export_objs = 0; if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS){ /* * This parameter setting requires that all local objects * and their assignments appear in the export list. */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs, export_global_ids, export_local_ids, wgt_dim, &fdummy, export_to_part); if (error == ZOLTAN_OK){ ZOLTAN_FREE(&fdummy); if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs, ZOLTAN_SPECIAL_MALLOC_INT)){ for (i=0; i<*num_export_objs; i++) (*export_procs)[i] = zz->Proc; } else{ error = ZOLTAN_MEMERR; } } } goto End; } /* * Check whether we know the import data, export data, or both. * * If we were given the import data, * we know what the new decomposition should look like on the * processor, but we don't know which of our local objects we have * to export to other processors to establish the new decomposition. * Reverse the argument if we were given the export data. * * Unless we were given both maps, compute the inverse map. */ if (zz->LB.Return_Lists == ZOLTAN_LB_NO_LISTS) { if (*num_import_objs >= 0) Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, import_procs, import_to_part); if (*num_export_objs >= 0) Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); *num_import_objs = *num_export_objs = -1; } if (*num_import_objs >= 0){ if (*num_export_objs >= 0) { /* Both maps already available; nothing to do. */; } else if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* Export lists are requested; compute export map */ error = Zoltan_Invert_Lists(zz, *num_import_objs, *import_global_ids, *import_local_ids, *import_procs, *import_to_part, num_export_objs, export_global_ids, export_local_ids, export_procs, export_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error building return arguments; " "%d returned by Zoltan_Compute_Destinations\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } if (zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* Method returned import lists, but only export lists were desired. */ /* Import lists not needed; free them. */ *num_import_objs = -1; Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, import_procs, import_to_part); } } } else { /* (*num_import_objs < 0) */ if (*num_export_objs >= 0) { /* Only export lists have been returned. */ if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) { /* Compute import map */ error = Zoltan_Invert_Lists(zz, *num_export_objs, *export_global_ids, *export_local_ids, *export_procs, *export_to_part, num_import_objs, import_global_ids, import_local_ids, import_procs, import_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error building return arguments; " "%d returned by Zoltan_Compute_Destinations\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } if (zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) { /* Method returned export lists, but only import lists are desired. */ /* Export lists not needed; free them. */ *num_export_objs = -1; Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); } } } else { /* *num_export_objs < 0 && *num_import_objs < 0) */ if (zz->LB.Return_Lists) { /* No map at all available */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Load-balancing function returned " "neither import nor export data."); error = ZOLTAN_WARN; goto End; } } } if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* * Normally, Zoltan_LB returns in the export lists all local * objects that are moving off processor, or that are assigned * to a part on the local processor that is not the * default part. This setting of Return_Lists requests * that all local objects be included in the export list. */ if (*num_export_objs == 0){ /* all local objects are remaining on processor */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs, export_global_ids, export_local_ids, wgt_dim, &fdummy, export_to_part); if (error == ZOLTAN_OK){ ZOLTAN_FREE(&fdummy); if (*num_export_objs) { if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs, ZOLTAN_SPECIAL_MALLOC_INT)){ for (i=0; i<*num_export_objs; i++) (*export_procs)[i] = zz->Proc; } else{ error = ZOLTAN_MEMERR; } } } if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)) goto End; } else{ all_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &error); if (*num_export_objs < all_num_obj){ /* Create a lookup table for exported IDs */ ts = Zoltan_Recommended_Hash_Size(*num_export_objs); ht = create_hash_table(zz, *export_global_ids, *num_export_objs, ts); /* Create a list of all gids, lids and parts */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, &all_num_obj, &all_global_ids, &all_local_ids, wgt_dim, &fdummy, &parts); if ((error == ZOLTAN_OK) || (error == ZOLTAN_WARN)){ ZOLTAN_FREE(&fdummy); if ((Zoltan_Special_Malloc(zz, (void **)(void*)&export_all_procs, all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0) || (Zoltan_Special_Malloc(zz, (void **)(void*)&export_all_to_part, all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0)){ error = ZOLTAN_MEMERR; } } if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)){ sprintf(msg, "Error building complete export list; " "%d returned by Zoltan_Get_Obj_List\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } gid = all_global_ids; for (i=0; i < all_num_obj; i++, gid += zz->Num_GID){ idIdx = search_hash_table(zz, gid, ht, ts); if (idIdx >= 0){ export_all_procs[i] = (*export_procs)[idIdx]; export_all_to_part[i] = (*export_to_part)[idIdx]; } else{ export_all_procs[i] = zz->Proc; export_all_to_part[i] = parts[i]; } } free_hash_table(ht, ts); Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); Zoltan_Special_Free(zz, (void **)(void*)&parts, ZOLTAN_SPECIAL_MALLOC_INT); *export_global_ids = all_global_ids; *export_local_ids = all_local_ids; *export_procs = export_all_procs; *export_to_part = export_all_to_part; *num_export_objs = all_num_obj; } } } ZOLTAN_TRACE_DETAIL(zz, yo, "Done building return arguments"); end_time = Zoltan_Time(zz->Timer); lb_time[0] = end_time - start_time; if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) { int i; Zoltan_Print_Sync_Start(zz->Communicator, TRUE); printf("ZOLTAN: Objects to be imported to Proc %d\n", zz->Proc); for (i = 0; i < *num_import_objs; i++) { printf(" Obj: "); ZOLTAN_PRINT_GID(zz, &((*import_global_ids)[i*zz->Num_GID])); printf(" To part: %4d", (*import_to_part != NULL ? (*import_to_part)[i] : zz->Proc)); printf(" From processor: %4d\n", (*import_procs)[i]); } printf("\n"); printf("ZOLTAN: Objects to be exported from Proc %d\n", zz->Proc); for (i = 0; i < *num_export_objs; i++) { printf(" Obj: "); ZOLTAN_PRINT_GID(zz, &((*export_global_ids)[i*zz->Num_GID])); printf(" To part: %4d", (*export_to_part != NULL ? (*export_to_part)[i] : (*export_procs)[i])); printf(" To processor: %4d\n", (*export_procs)[i]); } Zoltan_Print_Sync_End(zz->Communicator, TRUE); } /* * If the Help_Migrate flag is set, perform migration for the application. */ if (zz->Migrate.Auto_Migrate) { ZOLTAN_TRACE_DETAIL(zz, yo, "Begin auto-migration"); start_time = Zoltan_Time(zz->Timer); error = Zoltan_Migrate(zz, *num_import_objs, *import_global_ids, *import_local_ids, *import_procs, *import_to_part, *num_export_objs, *export_global_ids, *export_local_ids, *export_procs, *export_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error in auto-migration; %d returned from " "Zoltan_Help_Migrate\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } end_time = Zoltan_Time(zz->Timer); lb_time[1] = end_time - start_time; ZOLTAN_TRACE_DETAIL(zz, yo, "Done auto-migration"); } /* Print timing info */ if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) { if (zz->Proc == zz->Debug_Proc) { printf("ZOLTAN Times: \n"); } Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[0], "ZOLTAN Partition: "); if (zz->Migrate.Auto_Migrate) Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[1], "ZOLTAN Migrate: "); } *changes = 1; End: ZOLTAN_TRACE_EXIT(zz, yo); return (error); }
int Zoltan_PHG_Set_2D_Proc_Distrib( ZZ *zz, /* Input: ZZ struct; for debuging */ MPI_Comm Communicator, /* Input: The MPI Communicator; this communicator may be MPI_COMM_NULL, as PHG_Redistribute uses this function with MPI_COMM_NULL to compute nProc_x and nProc_y. */ int proc, /* Input: Rank of current processor */ int nProc, /* Input: Total # of processors */ int nProc_x, /* Input: Suggested #procs in x-direction */ int nProc_y, /* Input: Suggested #procs in y-direction */ PHGComm *comm /* Ouput: filled */ ) { /* Computes the processor distribution for the 2D data distrib. * Sets nProc_x, nProc_y. * Constraint: nProc_x * nProc_y == nProc. * For 2D data distrib, default should approximate sqrt(nProc). * If nProc_x and nProc_y both equal -1 on input, compute default. * Otherwise, compute valid values and/or return error. */ char *yo = "Zoltan_PHG_Set_2D_Proc_Distrib"; int tmp; int ierr = ZOLTAN_OK; if (nProc_x == -1 && nProc_y == -1) { /* Compute default */ tmp = (int) sqrt((double)nProc+0.1); while (nProc % tmp) tmp--; comm->nProc_x = tmp; comm->nProc_y = nProc / tmp; } else if (nProc_x == -1) { comm->nProc_y = MIN(nProc_y, nProc); comm->nProc_x = nProc / comm->nProc_y; } else if (nProc_y == -1) { comm->nProc_x = MIN(nProc_x, nProc); comm->nProc_y = nProc / comm->nProc_x; } else { comm->nProc_x = nProc_x; comm->nProc_y = nProc_y; } /* Error check */ if (comm->nProc_x * comm->nProc_y != nProc) { ZOLTAN_PRINT_ERROR(proc, yo, "Values for PHG_NPROC_X and PHG_NPROC_Y " "do not evenly divide the " "total number of processors."); ierr = ZOLTAN_FATAL; goto End; } comm->nProc = nProc; comm->Communicator = Communicator; comm->zz = zz; if (Communicator==MPI_COMM_NULL) { comm->myProc_x = -1; comm->myProc_y = -1; comm->myProc = -1; comm->col_comm = comm->row_comm = MPI_COMM_NULL; } else { comm->myProc_x = proc % comm->nProc_x; comm->myProc_y = proc / comm->nProc_x; comm->myProc = proc; if ((MPI_Comm_split(Communicator, comm->myProc_x, comm->myProc_y, &comm->col_comm) != MPI_SUCCESS) || (MPI_Comm_split(Communicator, comm->myProc_y, comm->myProc_x, &comm->row_comm) != MPI_SUCCESS)) { ZOLTAN_PRINT_ERROR(proc, yo, "MPI_Comm_Split failed"); return ZOLTAN_FATAL; } Zoltan_Srand_Sync(Zoltan_Rand(NULL), &(comm->RNGState_row), comm->row_comm); Zoltan_Srand_Sync(Zoltan_Rand(NULL), &(comm->RNGState_col), comm->col_comm); Zoltan_Srand_Sync(Zoltan_Rand(NULL), &(comm->RNGState), comm->Communicator); } /* printf("(%d, %d) of [%d, %d] -> After Comm_split col_comm=%d row_comm=%d\n", hgp->myProc_x, hgp->myProc_y, hgp->nProc_x, hgp->nProc_y, (int)hgp->col_comm, (int)hgp->row_comm); */ End: return ierr; }
int Zoltan_PHG_Vertex_Visit_Order( ZZ *zz, HGraph *hg, PHGPartParams *hgp, int *order) { int i, j, edge; int *ldegree=NULL, *gdegree=NULL; /* local/global degree */ int *lpins=NULL, *gpins=NULL; /* local/global sum of pins */ char *yo= "Zoltan_PHG_Vertex_Visit_Order"; /* Start with linear order. */ for (i=0; i<hg->nVtx; i++) order[i] = i; /* Permute order array according to chosen strategy. */ switch (hgp->visit_order){ case 0: /* random node visit order (recommended) */ /* Synchronize so each proc in column visits in same order */ Zoltan_Srand_Sync(Zoltan_Rand(NULL), &(hg->comm->RNGState_col), hg->comm->col_comm); Zoltan_Rand_Perm_Int (order, hg->nVtx, &(hg->comm->RNGState_col)); break; case 1: /* linear (natural) vertex visit order */ break; case 2: { /* increasing vertex weight */ float *tmpvwgt; if (hg->VtxWeightDim == 1) tmpvwgt = hg->vwgt; else { /* Sort based on first component of multidimensional weight */ tmpvwgt = (float *) ZOLTAN_MALLOC(hg->nVtx * sizeof(float)); for (i = 0; i < hg->nVtx; i++) tmpvwgt[i] = hg->vwgt[i*hg->VtxWeightDim]; } Zoltan_quicksort_pointer_inc_float (order, tmpvwgt, 0, hg->nVtx-1); if (tmpvwgt != hg->vwgt) ZOLTAN_FREE(&tmpvwgt); break; } case 3: /* increasing vertex degree */ /* intentionally fall through into next case */ case 4: /* increasing vertex degree, weighted by # pins */ /* allocate 4 arrays of size hg->nVtx with a single malloc */ if (!(ldegree = (int *) ZOLTAN_MALLOC (4*sizeof(int) * hg->nVtx))){ ZOLTAN_PRINT_WARN(zz->Proc, yo, "Out of memory"); ZOLTAN_FREE (&ldegree); return ZOLTAN_MEMERR; } /* first local data, then global data */ lpins = ldegree + hg->nVtx; gdegree = lpins + hg->nVtx; gpins = gdegree + hg->nVtx; /* loop over vertices */ for (i=0; i<hg->nVtx; i++){ ldegree[i] = hg->vindex[i+1] - hg->vindex[i]; /* local degree */ lpins[i] = 0; /* loop over edges, sum up #pins */ for (j= hg->vindex[i]; j < hg->vindex[i+1]; j++) { edge = hg->vedge[j]; lpins[i] += hg->hindex[edge+1] - hg->hindex[edge]; } } /* sum up local degrees in each column to get global degrees */ /* also sum up #pins in same communication */ MPI_Allreduce(ldegree, gdegree, 2*hg->nVtx, MPI_INT, MPI_SUM, hg->comm->col_comm); /* sort by global values. same on every processor. */ if (hgp->visit_order == 3) Zoltan_quicksort_pointer_inc_int_int (order, gdegree, gpins, 0, hg->nVtx-1); else /* hgp->visit_order == 4 */ Zoltan_quicksort_pointer_inc_int_int (order, gpins, gdegree, 0, hg->nVtx-1); ZOLTAN_FREE (&ldegree); break; /* add more cases here */ } return ZOLTAN_OK; }
int Zoltan_PHG_CoarsePartition( ZZ *zz, HGraph *phg, /* Input: coarse hypergraph -- distributed! */ int numPart, /* Input: number of partitions to generate. */ float *part_sizes, /* Input: array of size numPart listing target sizes (% of work) for the partitions */ Partition part, /* Input: array of initial partition assignments. Output: array of computed partition assignments. */ PHGPartParams *hgp /* Input: parameters to use. */ ) { /* * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph. * Typically, this routine is called at the bottom level in a * multilevel scheme (V-cycle). * It gathers the distributed hypergraph to each processor and computes * a decomposition of the serial hypergraph. * It computes a different partition on each processor * using different random numbers (and possibly also * different algorithms) and selects the best. */ char *yo = "Zoltan_PHG_CoarsePartition"; int ierr = ZOLTAN_OK; int i, si, j; static PHGComm scomm; /* Serial communicator info */ static int first_time = 1; HGraph *shg = NULL; /* Serial hypergraph gathered from phg */ int *spart = NULL; /* Partition vectors for shg. */ int *new_part = NULL; /* Ptr to new partition vector. */ float *bestvals = NULL; /* Best cut values found so far */ int worst, new_cand; float bal, cut, worst_cut; int fine_timing = (hgp->use_timers > 2); struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz); int local_coarse_part = hgp->LocalCoarsePartition; /* Number of iterations to try coarse partitioning on each proc. */ /* 10 when p=1, and 1 when p is large. */ const int num_coarse_iter = 1 + 9/zz->Num_Proc; ZOLTAN_TRACE_ENTER(zz, yo); if (fine_timing) { if (timer->cpgather < 0) timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather"); if (timer->cprefine < 0) timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine"); if (timer->cpart < 0) timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part"); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Force LocalCoarsePartition if large global graph */ #define LARGE_GRAPH_VTX 64000 #define LARGE_GRAPH_PINS 256000 if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){ /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */ local_coarse_part = 1; } /* take care of all special cases first */ if (!strcasecmp(hgp->coarsepartition_str, "no") || !strcasecmp(hgp->coarsepartition_str, "none")) { /* Do no coarse partitioning. */ /* Do a sanity test and mapping to parts [0,...,numPart-1] */ int first = 1; PHGComm *hgc=phg->comm; Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm); if (hgp->UsePrefPart) { for (i = 0; i < phg->nVtx; i++) { /* Impose fixed vertex/preferred part constraints. */ if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */ /* randomly assigned to a part */ part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart); } else { if (phg->bisec_split < 0) /* direct k-way, use part numbers directly */ part[i] = phg->pref_part[i]; else /* recursive bisection, map to 0-1 part numbers */ part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1); } } } else { for (i = 0; i < phg->nVtx; i++) { if (part[i] >= numPart || part[i]<0) { if (first) { ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts."); first = 0; ierr = ZOLTAN_WARN; } part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart; } } } } else if (numPart == 1) { /* everything goes in the one partition */ for (i = 0; i < phg->nVtx; i++) part[i] = 0; } else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { /* more partitions than vertices, trivial answer */ for (i = 0; i < phg->nVtx; i++) part[i] = phg->dist_x[phg->comm->myProc_x]+i; } else if (local_coarse_part) { /* Apply local partitioner to each column */ ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp, hgp->CoarsePartition); } else { /* Normal case: * Gather distributed HG to each processor; * compute different partitioning on each processor; * select the "best" result. */ ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition; /* Select different coarse partitioners for processors here. */ CoarsePartition = hgp->CoarsePartition; if (CoarsePartition == NULL) { /* auto */ /* Select a coarse partitioner from the array of coarse partitioners */ CoarsePartition = CoarsePartitionFns[phg->comm->myProc % NUM_COARSEPARTITION_FNS]; } if (phg->comm->nProc == 1) { /* Serial and parallel hgraph are the same. */ shg = phg; } else { /* Set up a serial communication struct for gathered HG */ if (first_time) { scomm.nProc_x = scomm.nProc_y = 1; scomm.myProc_x = scomm.myProc_y = 0; scomm.Communicator = MPI_COMM_SELF; scomm.row_comm = MPI_COMM_SELF; scomm.col_comm = MPI_COMM_SELF; scomm.myProc = 0; scomm.nProc = 1; first_time = 0; } scomm.RNGState = Zoltan_Rand(NULL); scomm.RNGState_row = Zoltan_Rand(NULL); scomm.RNGState_col = Zoltan_Rand(NULL); scomm.zz = zz; /* * Gather parallel hypergraph phg to each processor, creating * serial hypergraph shg. */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator); } ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather."); goto End; } if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } } /* * Allocate partition array spart for the serial hypergraph shg * and partition shg. */ spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1), sizeof(int)); bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); if ((!spart) || (!bestvals)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory."); ierr = ZOLTAN_MEMERR; goto End; } /* Compute several coarse partitionings. */ /* Keep the NUM_PART_KEEP best ones around. */ /* Currently, only the best one is used. */ /* Set RNG so different procs compute different parts. */ Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL); new_cand = 0; new_part = spart; for (i=0; i< num_coarse_iter; i++){ int savefmlooplimit=hgp->fm_loop_limit; /* Overwrite worst partition with new candidate. */ ierr = CoarsePartition(zz, shg, numPart, part_sizes, new_part, hgp); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from CoarsePartition."); goto End; } /* time refinement step in coarse partitioner */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator); } /* UVCUVC: Refine new candidate: only one pass is enough. */ hgp->fm_loop_limit = 1; Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp); hgp->fm_loop_limit = savefmlooplimit; /* stop refinement timer */ if (fine_timing) { ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator); ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator); } /* Decide if candidate is in the top tier or not. */ /* Our objective is a combination of cuts and balance */ bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, numPart, new_part); cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr); /* Use ratio-cut as our objective. There are many other options! */ bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */ if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_PHG_Compute_ConCut."); goto End; } if (i<NUM_PART_KEEP) new_cand = i+1; else { /* find worst partition vector, to overwrite it */ /* future optimization: keep bestvals sorted */ worst = 0; worst_cut = bestvals[0]; for (j=1; j<NUM_PART_KEEP+1; j++){ if (worst_cut < bestvals[j]){ worst_cut = bestvals[j]; worst = j; } } new_cand = worst; } new_part = spart+new_cand*(shg->nVtx); } /* Copy last partition vector such that all the best ones are contiguous starting at spart. */ for (i=0; i<shg->nVtx; i++){ new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i]; } /* Also update bestvals */ bestvals[new_cand] = bestvals[NUM_PART_KEEP]; /* Evaluate and select the best. */ /* For now, only pick the best one, in the future we pick the k best. */ ierr = pick_best(zz, hgp, phg->comm, shg, numPart, MIN(NUM_PART_KEEP, num_coarse_iter), spart, bestvals); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from pick_best."); goto End; } if (phg->comm->nProc > 1) { /* Map gathered partition back to 2D distribution */ for (i = 0; i < phg->nVtx; i++) { /* KDDKDD Assume vertices in serial HG are ordered by GNO of phg */ si = VTX_LNO_TO_GNO(phg, i); part[i] = spart[si]; } Zoltan_HG_HGraph_Free(shg); ZOLTAN_FREE(&shg); } else { /* single processor */ for (i = 0; i < phg->nVtx; i++) part[i] = spart[i]; } ZOLTAN_FREE(&spart); ZOLTAN_FREE(&bestvals); } End: if (fine_timing) ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator); ZOLTAN_TRACE_EXIT(zz, yo); return ierr; }