int Zoltan_Divide_Machine( ZZ *zz, /* The Zoltan structure (not used now, will be used for pointer to machine details */ int obj_wgt_dim, /* Number of different weights (loads). */ float *part_sizes, /* Array of partition sizes, containing percentage of work per partition. (length= obj_wgt_dim*num_parts) */ int proc, /* my processor number in global sense */ MPI_Comm comm, /* communicator for part of machine to be divided */ int *set, /* set that proc is in after divide (lowest global numbered processor in set 0) */ int *proclower, /* lowest numbered processor in first set */ int *procmid, /* lowest numbered processor in second set */ int *num_procs, /* on input, # of procs to be divided on exit, # of procs in the set that proc is in */ int *partlower, /* lowest numbered partition in first set */ int *partmid, /* lowest numbered partition in second set */ int *num_parts, /* on input, # of partitions to be divided on exit, # of parts in the set that proc is in */ double *fractionlo /* actual division of machine: % of work to be assigned to first set (length obj_wgt_dim) */ ) { int i, j, k; int np = 0; /* Number of partitions on procmid */ int fpartmid; /* First partition on procmid */ int totalparts; /* Total number of partitions in input set. */ int totalprocs; /* Total number of processors in input set. */ int dim = obj_wgt_dim; double *sum = NULL; /* This routine divides the current machine (defined by the communicator) * into two pieces. * For now, it simply divides the machine in half. In the future, it will * be a more complicated routine taking into account the architecture of * the machine and communication network. * The two resulting sets contain contiguously numbered processors * and partitions. */ if (dim<1) dim = 1; /* In case obj_wgt_dim==0. */ /* The following statement assumes that proclower is being set correctly in the calling routine if Tflops_Special flag is set */ if (!zz->Tflops_Special) MPI_Allreduce(&proc, proclower, 1, MPI_INT, MPI_MIN, comm); totalparts = *partlower + *num_parts; totalprocs = *proclower + *num_procs; /* Compute procmid as roughly half the number of processors. */ /* Then partmid is the lowest-numbered partition on procmid. */ *procmid = *proclower + (*num_procs - 1)/2 + 1; if (*procmid < totalprocs) Zoltan_LB_Proc_To_Part(zz, *procmid, &np, &fpartmid); if (np > 0) *partmid = fpartmid; else { /* No partitions on procmid; find next part number in procs > procmid */ i = *procmid; while (np == 0 && (++i) < totalprocs) { Zoltan_LB_Proc_To_Part(zz, i, &np, &fpartmid); } if (np) *partmid = fpartmid; else *partmid = totalparts; } /* Check special cases */ if (!zz->LB.Single_Proc_Per_Part && *partmid != totalparts) { i = Zoltan_LB_Part_To_Proc(zz, *partmid, NULL); if (i != *procmid) { /* Partition is spread across several processors. Don't allow mid to fall within a partition; reset procmid so that it falls at a partition boundary. */ if (i != *proclower) { /* set procmid to lowest processor containing partmid */ *procmid = i; } else { /* i == *proclower */ /* Move mid to next partition so that procmid != proclower */ (*partmid)++; *procmid = Zoltan_LB_Part_To_Proc(zz, *partmid, NULL); } } } /* Sum up desired partition sizes. */ sum = (double *)ZOLTAN_MALLOC(dim*sizeof(double)); for (k=0; k<dim; k++){ sum[k] = 0.0; fractionlo[k] = 0.0; } for (i = 0; i < *num_parts; i++) { j = *partlower + i; for (k=0; k<dim; k++){ if (j < *partmid) fractionlo[k] += (double) part_sizes[j*dim+k]; sum[k] += (double) part_sizes[j*dim+k]; } } for (k=0; k<dim; k++) if (sum[k] != 0.0) fractionlo[k] /= sum[k]; if (proc < *procmid) { *set = 0; *num_parts = *partmid - *partlower; *num_procs = *procmid - *proclower; } else { *set = 1; *num_parts = totalparts - *partmid; *num_procs = totalprocs - *procmid; } ZOLTAN_FREE(&sum); return ZOLTAN_OK; }
static int rib_fn( ZZ *zz, /* The Zoltan structure with info for the RIB balancer. */ int *num_import, /* Number of non-local objects assigned to this processor in the new decomposition. When LB.Return_Lists==CANDIDATE_LISTS, num_import returns the number of input objects as given by ZOLTAN_NUM_OBJ_FN. */ ZOLTAN_ID_PTR *import_global_ids, /* Returned value: array of global IDs for non-local objects in this processor's new decomposition. When LB.Return_Lists==CANDIDATE_LISTS, this array contains GIDs for all input objs as given by ZOLTAN_OBJ_LIST_FN.*/ ZOLTAN_ID_PTR *import_local_ids, /* Returned value: array of local IDs for non-local objects in this processor's new decomposition. When LB.Return_Lists==CANDIDATE_LISTS, this array contains LIDs for all input objs as given by ZOLTAN_OBJ_LIST_FN.*/ int **import_procs, /* Returned value: array of processor IDs for processors owning the non-local objects in this processor's new decomposition. When LB.Return_Lists==CANDIDATE_LISTS, the returned array is NULL. */ int **import_to_part, /* Returned value: array of parts to which objects are imported. When LB.Return_Lists==CANDIDATE_LISTS, the returned array is NULL. */ int *num_export, /* Returned value only when LB.Return_Lists==CANDIDATE_LISTS; number of input objs as given by ZOLTAN_NUM_OBJ_FN */ ZOLTAN_ID_PTR *export_global_ids, /* Returned value only when LB.Return_Lists==CANDIDATE_LISTS; for each input obj (from ZOLTAN_OBJ_LIST_FN), return a candidate obj from the part to which the obj is assigned; used in PHG matching */ double overalloc, /* amount to overallocate by when realloc of dot array must be done. 1.0 = no extra; 1.5 = 50% extra; etc. */ int wgtflag, /* No. of weights per dot supplied by user. */ int check_geom, /* Check input & output for consistency? */ int stats, /* Print timing & count summary? */ int gen_tree, /* (0) do not (1) do generate full treept */ int average_cuts, /* (0) don't (1) compute the cut to be the average of the closest dots. */ float *part_sizes /* Input: Array of size zz->Num_Global_Parts * max(zz->Obj_Weight_Dim, 1) containing the percentage of work to be assigned to each part. */ ) { char yo[] = "rib_fn"; int proc,nprocs; /* my proc id, total # of procs */ struct Dot_Struct *dotpt; /* temporary pointer to local dot arrays */ int pdotnum; /* # of dots - decomposition changes it */ int *dotmark = NULL; /* which side of median for each dot */ int dotnum; /* number of dots */ int dotmax = 0; /* max # of dots arrays can hold */ int dottop; /* dots >= this index are new */ int proclower; /* 1st proc in lower set */ int procmid; /* 1st proc in upper set */ int partlower; /* 1st part in lower set */ int partmid; /* 1st part in upper set */ int set; /* which set processor is in = 0/1 */ int old_set; /* set processor was in last cut = 0/1 */ int root; /* part that stores last cut */ int num_procs; /* number of procs in current set */ int num_parts; /* number of parts in current set */ int ierr = ZOLTAN_OK; /* error flag. */ double *value = NULL; /* temp array for median_find */ double *wgts = NULL; /* temp array for serial_rib */ double valuehalf; /* median cut position */ double cm[3]; /* Center of mass of objects */ double evec[3]; /* Eigenvector defining direction */ int first_guess = 0; /* flag if first guess for median search */ int allocflag; /* have to re-allocate space */ double time1=0,time2=0; /* timers */ double time3=0,time4=0; /* timers */ double timestart=0,timestop=0; /* timers */ double timers[4]={0.,0.,0.,0.}; /* diagnostic timers 0 = start-up time before recursion 1 = time before median iterations 2 = time in median iterations 3 = communication time */ ZOLTAN_GNO_TYPE counters[7]; /* diagnostic counts 0 = unused 1 = # of dots sent 2 = # of dots received 3 = most dots this proc ever owns 4 = most dot memory this proc ever allocs 5 = # of times a previous cut is re-used 6 = # of reallocs of dot array */ int i, j; /* local variables */ int use_ids; /* When true, global and local IDs will be stored along with dots in the RCB_STRUCT. When false, storage, manipulation, and communication of IDs is avoided. Set by call to Zoltan_RB_Use_IDs(). */ RIB_STRUCT *rib = NULL; /* Pointer to data structures for RIB */ struct rib_tree *treept = NULL; /* tree of cuts - single cut on exit*/ double start_time, end_time; double lb_time[2]={0,0}; int tfs[2], tmp_tfs[2]; /* added for Tflops_Special; max number of procs and parts over all processors in each iteration (while loop) of parallel partitioning. */ int old_nprocs; /* added for Tflops_Special */ int old_nparts; /* added for Tflops_Special */ double valuelo; /* smallest value of value[i] */ double valuehi; /* largest value of value[i] */ double weight[RB_MAX_WGTS]; /* weight for current set */ double weightlo[RB_MAX_WGTS]; /* weight of lower side of cut */ double weighthi[RB_MAX_WGTS]; /* weight of upper side of cut */ double fractionlo[RB_MAX_WGTS]; /* desired wt in lower half */ int *dotlist = NULL; /* list of dots for find_median. allocated above find_median for better efficiency (don't necessarily have to realloc for each find_median).*/ int rectilinear_blocks = 0; /* parameter for find_median (not used by rib) */ int fp=0; /* first part assigned to this proc. */ int np=0; /* number of parts assigned to this proc. */ int wgtdim; /* max(wgtflag,1) */ int *dindx = NULL, *tmpdindx = NULL; /* MPI data types and user functions */ MPI_Comm local_comm, tmp_comm; int free_comm = FALSE; /* Flag indicating whether MPI_Comm_free should be called on local_comm at end. */ ZOLTAN_TRACE_ENTER(zz, yo); if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) { MPI_Barrier(zz->Communicator); timestart = time1 = Zoltan_Time(zz->Timer); } /* setup for parallel */ proc = zz->Proc; nprocs = zz->Num_Proc; num_parts = zz->LB.Num_Global_Parts; /* * Determine whether to store, manipulate, and communicate global and * local IDs. */ use_ids = Zoltan_RB_Use_IDs(zz); /* * Build the RIB Data structure and * set pointers to information in it. */ start_time = Zoltan_Time(zz->Timer); ierr = Zoltan_RIB_Build_Structure(zz, &pdotnum, &dotmax, wgtflag, overalloc, use_ids); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RIB_Build_Structure."); goto End; } rib = (RIB_STRUCT *) (zz->LB.Data_Structure); treept = rib->Tree_Ptr; end_time = Zoltan_Time(zz->Timer); lb_time[0] = end_time - start_time; start_time = end_time; /* local copies of calling parameters */ dottop = dotnum = pdotnum; /* initialize timers and counters */ counters[0] = 0; counters[1] = 0; counters[2] = 0; counters[3] = dotnum; counters[4] = dotmax; counters[5] = 0; counters[6] = 0; /* Ensure there are dots */ MPI_Allreduce(&dotnum, &i, 1, MPI_INT, MPI_MAX, zz->Communicator); if (i == 0){ if (proc == 0){ ZOLTAN_PRINT_WARN(proc, yo, "RIB partitioning called with no objects"); } timestart = timestop = 0; goto EndReporting; } /* If using RIB for matching, need to generate candidate lists. * Candidate lists include input GIDs, LIDs as provided by the application. * We need to capture that input here before we move any dots! * We return it in the import lists. * Candidates will be computed after partitioning and returned in the * export lists. */ if (zz->LB.Return_Lists == ZOLTAN_LB_CANDIDATE_LISTS) { ierr = Zoltan_RB_Candidates_Copy_Input(zz, dotnum, rib->Global_IDs, rib->Local_IDs, &rib->Dots, num_import, import_global_ids, import_local_ids, import_procs, import_to_part); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc,yo, "Error returned from Zoltan_RB_Return_Arguments."); goto End; } } /* create mark and list arrays for dots */ allocflag = 0; if (dotmax > 0) { if (!(dotmark = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int))) || !(value = (double *) ZOLTAN_MALLOC(dotmax*sizeof(double))) || !(dotlist = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int)))) { ierr = ZOLTAN_MEMERR; goto End; } } else { dotmark = NULL; value = NULL; dotlist = NULL; } /* set dot weights = 1.0 if user didn't and determine total weight */ dotpt = &rib->Dots; if (dotpt->nWeights == 0) { weightlo[0] = (double) dotnum; dotpt->uniformWeight = 1.0; wgtdim = 1; } else { double *wgt; for (j=0; j<dotpt->nWeights; j++){ weightlo[j] = 0.0; wgt = dotpt->Weight + j; for (i=0; i < dotnum; i++){ weightlo[j] += *wgt; wgt += dotpt->nWeights; } } wgtdim = dotpt->nWeights; } MPI_Allreduce(weightlo, weight, wgtdim, MPI_DOUBLE, MPI_SUM, zz->Communicator); if (check_geom) { ierr = Zoltan_RB_check_geom_input(zz, dotpt, dotnum); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RB_check_geom_input"); goto End; } } /* create local communicator for use in recursion */ if (zz->Tflops_Special) local_comm = zz->Communicator; else { MPI_Comm_dup(zz->Communicator,&local_comm); free_comm = TRUE; } if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) { time2 = Zoltan_Time(zz->Timer); timers[0] = time2 - time1; } /* recursively halve until just one part or proc in set */ old_nprocs = num_procs = nprocs; old_nparts = num_parts; partlower = 0; root = 0; old_set = 1; ierr = Zoltan_LB_Proc_To_Part(zz, proc, &np, &fp); for (i = fp; i < (fp + np); i++) { treept[i].parent = 0; treept[i].left_leaf = 0; } if (zz->Tflops_Special) { proclower = 0; tfs[0] = nprocs; tfs[1] = num_parts; } while ((num_parts > 1 && num_procs > 1) || (zz->Tflops_Special && tfs[0] > 1 && tfs[1] > 1)) { ierr = Zoltan_Divide_Machine(zz, zz->Obj_Weight_Dim, part_sizes, proc, local_comm, &set, &proclower, &procmid, &num_procs, &partlower, &partmid, &num_parts, fractionlo); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error in Zoltan_Divide_Machine."); goto End; } /* tfs[0] is max number of processors in all sets over all processors - * tfs[1] is max number of parts in all sets over all processors - * force all processors to go through all levels of parallel rib */ if (zz->Tflops_Special) { tmp_tfs[0] = num_procs; tmp_tfs[1] = num_parts; MPI_Allreduce(tmp_tfs, tfs, 2, MPI_INT, MPI_MAX, local_comm); } /* create mark array and active list for dots */ if (allocflag) { allocflag = 0; ZOLTAN_FREE(&dotmark); ZOLTAN_FREE(&value); ZOLTAN_FREE(&dotlist); if (!(dotmark = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int))) || !(value = (double *) ZOLTAN_MALLOC(dotmax*sizeof(double))) || !(dotlist = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int)))) { ierr = ZOLTAN_MEMERR; goto End; } } dotpt = &rib->Dots; if (old_nparts > 1 && old_nprocs > 1) { /* test added for Tflops_Special; compute values only if looping to decompose, not if looping to keep Tflops_Special happy. */ ierr = compute_rib_direction(zz, zz->Tflops_Special, rib->Num_Geom, &valuelo, &valuehi, dotpt, NULL, dotnum, wgtflag, cm, evec, value, local_comm, proc, old_nprocs, proclower); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from compute_rib_direction"); goto End; } } else { /* For Tflops_Special: initialize value when looping only for Tflops_Special */ for (i = 0; i < dotmax; i++) value[i] = 0.0; valuelo = valuehi = 0.0; } if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) time2 = Zoltan_Time(zz->Timer); if (!Zoltan_RB_find_median( zz->Tflops_Special, value, dotpt->Weight, dotpt->uniformWeight, dotmark, dotnum, proc, fractionlo, local_comm, &valuehalf, first_guess, nprocs, old_nprocs, proclower, old_nparts, wgtflag, valuelo, valuehi, weight[0], weightlo, weighthi, dotlist, rectilinear_blocks, average_cuts)) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RB_find_median."); ierr = ZOLTAN_FATAL; goto End; } if (set) /* set weight for current part */ for (j=0; j<wgtdim; j++) weight[j] = weighthi[j]; else for (j=0; j<wgtdim; j++) weight[j] = weightlo[j]; if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) time3 = Zoltan_Time(zz->Timer); /* store cut info in tree only if proc "owns" partmid */ /* test of partmid > 0 prevents treept[0] being set when this cut is only removing low-numbered processors (proclower to procmid-1) that have no parts in them from the processors remaining to be partitioned. */ if (partmid > 0 && partmid == fp) { treept[partmid].cm[0] = cm[0]; treept[partmid].cm[1] = cm[1]; treept[partmid].cm[2] = cm[2]; treept[partmid].ev[0] = evec[0]; treept[partmid].ev[1] = evec[1]; treept[partmid].ev[2] = evec[2]; treept[partmid].cut = valuehalf; treept[partmid].parent = old_set ? -(root+1) : root+1; /* The following two will get overwritten when the information is assembled if this is not a terminal cut */ treept[partmid].left_leaf = -partlower; treept[partmid].right_leaf = -partmid; } if (old_nprocs > 1 && partmid > 0 && partmid != partlower + old_nparts) { /* old_nprocs > 1 test: Don't reset these values if proc is in loop only * because of other procs for Tflops_Special. * partmid > 0 test: Don't reset these values if low-numbered processors * (proclower to procmid-1) have zero parts and this cut is removing * them from the processors remaining to be partitioned. * partmid != partlower + old_nparts test: Don't reset these values if * cut is removing high-numbered processors with zero parts from * the processors remaining to be partitioned. */ old_set = set; root = partmid; } ierr = Zoltan_RB_Send_Outgoing(zz, &(rib->Global_IDs), &(rib->Local_IDs), &(rib->Dots), &dotmark, &dottop, &dotnum, &dotmax, set, &allocflag, overalloc, stats, counters, use_ids, local_comm, proclower, old_nprocs, partlower, partmid); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RB_Send_Outgoing."); goto End; } /* create new communicators */ if (zz->Tflops_Special) { if (set) { proclower = procmid; partlower = partmid; } old_nprocs = num_procs; old_nparts = num_parts; } else { if (set) partlower = partmid; MPI_Comm_split(local_comm,set,proc,&tmp_comm); MPI_Comm_free(&local_comm); local_comm = tmp_comm; old_nprocs = num_procs; old_nparts = num_parts; } if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) { time4 = Zoltan_Time(zz->Timer); timers[1] += time2 - time1; timers[2] += time3 - time2; timers[3] += time4 - time3; } } /* have recursed all the way to a single processor sub-domain */ /* Send dots to correct processors for their parts. This is needed most notably when a processor has zero parts on it, but still has some dots after the parallel partitioning. */ ierr = Zoltan_RB_Send_To_Part(zz, &(rib->Global_IDs), &(rib->Local_IDs), &(rib->Dots), &dotmark, &dottop, &dotnum, &dotmax, &allocflag, overalloc, stats, counters, use_ids); if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_RB_Send_To_Part"); goto End; } /* All dots are now on the processors they will end up on; now generate * more parts if needed. */ if (num_parts > 1) { if (dotpt->nWeights) wgts = (double *) ZOLTAN_MALLOC(dotpt->nWeights * dotnum * sizeof(double)); dindx = (int *) ZOLTAN_MALLOC(dotnum * 2 * sizeof(int)); tmpdindx = dindx + dotnum; if (allocflag) { ZOLTAN_FREE(&dotmark); ZOLTAN_FREE(&value); ZOLTAN_FREE(&dotlist); if (!(dotmark = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int))) || !(value = (double *) ZOLTAN_MALLOC(dotmax*sizeof(double))) || !(dotlist = (int *) ZOLTAN_MALLOC(dotmax*sizeof(int)))) { ZOLTAN_PRINT_ERROR(proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } } for (i = 0; i < dotnum; i++) dindx[i] = i; ierr = serial_rib(zz, &rib->Dots, dotmark, dotlist, old_set, root, rib->Num_Geom, weight[0], dotnum, num_parts, &(dindx[0]), &(tmpdindx[0]), partlower, proc, wgtflag, stats, gen_tree, rectilinear_blocks, average_cuts, treept, value, wgts, part_sizes); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from serial_rib"); goto End; } ZOLTAN_FREE(&wgts); } end_time = Zoltan_Time(zz->Timer); lb_time[1] = end_time - start_time; if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) { MPI_Barrier(zz->Communicator); timestop = time1 = Zoltan_Time(zz->Timer); } /* error checking and statistics */ if (check_geom) { ierr = Zoltan_RB_check_geom_output(zz, &rib->Dots, part_sizes, np, fp, dotnum, pdotnum, NULL); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RB_check_geom_output"); goto End; } } EndReporting: /* update calling routine parameters */ start_time = Zoltan_Time(zz->Timer); pdotnum = dotnum; /* Perform remapping (if requested) */ if (zz->LB.Remap_Flag) { ierr = Zoltan_RB_Remap(zz, &(rib->Global_IDs), &(rib->Local_IDs), &(rib->Dots), &dotnum, &dotmax, &allocflag, overalloc, stats, counters, use_ids); /* Note: dottop is no longer valid after remapping. Remapping might destroy the nice local-followed-by-non-local ordering of the dots array. Do not use dottop after remapping. */ if (ierr < 0) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_RB_Remap."); goto End; } } /* build return arguments */ if (zz->LB.Return_Lists != ZOLTAN_LB_NO_LISTS && zz->LB.Return_Lists != ZOLTAN_LB_CANDIDATE_LISTS) { /* zz->LB.Return_Lists is true ==> use_ids is true */ ierr = Zoltan_RB_Return_Arguments(zz, rib->Global_IDs, rib->Local_IDs, &rib->Dots, num_import, import_global_ids, import_local_ids, import_procs, import_to_part, dotnum); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc, yo, "Error returned from Zoltan_RB_Return_Arguments."); goto End; } } else if (zz->LB.Return_Lists == ZOLTAN_LB_CANDIDATE_LISTS) { /* Select a candidate for each part and return it in the export_GIDs. */ ierr = Zoltan_RB_Candidates_Output(zz, dotnum, dindx, rib->Global_IDs, rib->Local_IDs, &rib->Dots, *num_import, *import_global_ids, num_export, export_global_ids); if (ierr < 0) { ZOLTAN_PRINT_ERROR(proc,yo, "Error returned from Zoltan_RB_Return_Candidates."); goto End; } } ZOLTAN_FREE(&dindx); if (gen_tree) { int *displ, *recvcount; int sendcount; struct rib_tree *treetmp = NULL; /* temporary tree of cuts; used to keep valgrind from reporting overlapped memory in MPI_Allgatherv */ treetmp = (struct rib_tree *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts* sizeof(struct rib_tree)); displ = (int *) ZOLTAN_MALLOC(2 * zz->Num_Proc * sizeof(int)); if (!displ || !treetmp) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } recvcount = displ + zz->Num_Proc; ierr = Zoltan_RB_Tree_Gatherv(zz, sizeof(struct rib_tree), &sendcount, recvcount, displ); /* * Create copy of treept so that MPI_Allgatherv doesn't use same * memory for sending and receiving; removes valgrind warning. */ for (i = 0; i < zz->LB.Num_Global_Parts; i++) treetmp[i] = treept[i]; MPI_Allgatherv(&treetmp[fp], sendcount, MPI_BYTE, treept, recvcount, displ, MPI_BYTE, zz->Communicator); for (i = 1; i < zz->LB.Num_Global_Parts; i++){ if (treept[i].parent > 0) treept[treept[i].parent - 1].left_leaf = i; else if (treept[i].parent < 0) treept[-treept[i].parent - 1].right_leaf = i; } ZOLTAN_FREE(&displ); ZOLTAN_FREE(&treetmp); } else { treept[0].right_leaf = -1; } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) print_rib_tree(zz, np, fp, &(treept[fp])); end_time = Zoltan_Time(zz->Timer); lb_time[0] += (end_time - start_time); if (stats || (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME)) Zoltan_RB_stats(zz, timestop-timestart, &rib->Dots, dotnum, part_sizes, timers, counters, stats, NULL, NULL, FALSE); if (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME) { if (zz->Proc == zz->Debug_Proc) printf("ZOLTAN RIB Times: \n"); Zoltan_Print_Stats(zz->Communicator, zz->Debug_Proc, lb_time[0], "ZOLTAN Build: "); Zoltan_Print_Stats(zz->Communicator, zz->Debug_Proc, lb_time[1], "ZOLTAN RIB: "); } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) { /* zz->Debug_Level >= ZOLTAN_DEBUG_ALL ==> use_ids is true */ Zoltan_RB_Print_All(zz, rib->Global_IDs, &rib->Dots, dotnum, *num_import, *import_global_ids, *import_procs); } End: /* Free memory allocated by the algorithm. */ if (free_comm) MPI_Comm_free(&local_comm); ZOLTAN_FREE(&dotmark); ZOLTAN_FREE(&value); ZOLTAN_FREE(&dotlist); if (!gen_tree && /* don't need parts */ rib && (rib->Tran.Target_Dim < 0)) { /* don't need transformation */ /* Free all memory used. */ Zoltan_RIB_Free_Structure(zz); } else if (rib != NULL) { /* Free only Dots and IDs; keep other structures. */ ZOLTAN_FREE(&(rib->Global_IDs)); ZOLTAN_FREE(&(rib->Local_IDs)); Zoltan_Free_And_Reset_Dot_Structure(&(rib->Dots)); } ZOLTAN_TRACE_EXIT(zz, yo); return(ierr); }
static int local_HEs_from_import_lists( ZZ *zz, int remap_type, /* type of remapping to do: parts, procs, or none. */ int nobj, /* # objs the processor knows about (keep + imports) */ int *proc, /* On input, old processor assignment for each obj; Upon return, remapped new proc assignment for each obj. */ int *old_part, /* old partition assignments for each objs */ int *new_part, /* On input, new partition assignments for each objs. Upon return, remapped new partition assignments */ int *HEcnt, /* # of HEs allocated. */ int **HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { /* Routine to remap partitions (to new processors or new partition numbers) * to reduce data movement. * This routine assumes the load-balancing algorithm built import lists. * Objects described are those that ENDED UP on my_proc due to load balancing. * For all these objects, new_proc == my_proc. */ char *yo = "local_HEs_from_import_lists"; int ierr = ZOLTAN_OK; int i, cnt, tmp; int *tmp_HEinfo; int old_size; /* # of old entries to remap to. If remapping parts to processors, old_size = Num_Procs; if renumbering partitions, old_size = old num parts. */ int fp; /* First partition on this processor in new decomposition. */ int np; /* # of partitions on this processor in new decomposition. */ int my_proc = zz->Proc; /* This processor's rank. */ int minp, maxp; /* Lowest and highest partition numbers on this processor in old decomposition; partition numbers are assumed to be dense, but no particular distribution is assumed. */ int HEwgt_size; /* # of HE weights allocated. */ int *HEwgt = NULL; /* Array of HE weights. Initially includes zero weights; later zero-weights are removed.*/ if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) { /* Renumber new processors to minimize changes in proc assignment. */ HEwgt_size = zz->Num_Proc; HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int)); if (!HEwgt) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } for (i = 0; i < nobj; i++) HEwgt[proc[i]]++; /* At this point, proc has old proc assignments */ *HEcnt = 0; for (i = 0; i < HEwgt_size; i++) if (HEwgt[i] != 0) (*HEcnt)++; ierr = malloc_HEinfo(zz, *HEcnt, HEinfo); if (ierr < 0) goto End; tmp_HEinfo = *HEinfo; cnt = 0; for (i = 0; i < HEwgt_size; i++) { if (HEwgt[i] != 0) { tmp = cnt * HEINFO_ENTRIES; tmp_HEinfo[tmp] = i; /* Old processor number */ tmp_HEinfo[tmp+1] = my_proc; /* New processor number */ tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */ cnt++; } } } else { /* ZOLTAN_LB_REMAP_PARTS */ /* Renumber new partitions to minimize changes in partition assignment */ for (minp = INT_MAX, maxp = 0, i = 0; i < nobj; i++) { if (old_part[i] < minp) minp = old_part[i]; if (old_part[i] > maxp) maxp = old_part[i]; } /* Don't include old partition numbers that are greater than * zz->LB.Num_Global_Parts - 1; they are not valid values for * remapping of new partition numbers. */ if (minp >= zz->LB.Num_Global_Parts) minp = zz->LB.Num_Global_Parts-1; if (maxp >= zz->LB.Num_Global_Parts) maxp = zz->LB.Num_Global_Parts-1; old_size = maxp - minp + 1; Zoltan_LB_Proc_To_Part(zz, my_proc, &np, &fp); HEwgt_size = np * old_size; if (HEwgt_size > 0) { HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int)); if (!HEwgt) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } } for (i = 0; i < nobj; i++) { if (old_part[i] < zz->LB.Num_Global_Parts) { /* Include only HEs to old partitions numbered * 0 to zz->LB.Num_Global_Parts-1; these are the only valid * remapping values for the new partition numbers. */ tmp = (new_part[i]-fp) * old_size; HEwgt[tmp + (old_part[i]-minp)]++; } } *HEcnt = 0; for (i = 0; i < HEwgt_size; i++) if (HEwgt[i] != 0) (*HEcnt)++; ierr = malloc_HEinfo(zz, *HEcnt, HEinfo); if (ierr < 0) goto End; tmp_HEinfo = *HEinfo; cnt = 0; for (i = 0; i < HEwgt_size; i++) { if (HEwgt[i] != 0) { tmp = cnt * HEINFO_ENTRIES; tmp_HEinfo[tmp] = i%old_size + minp; /* Old partition number */ tmp_HEinfo[tmp+1] = i/old_size + fp; /* New partition number */ tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */ cnt++; } } } End: if (HEwgt) ZOLTAN_FREE(&HEwgt); return ierr; }
static int Zoltan_LB( ZZ *zz, int include_parts, /* Flag indicating whether to generate partition informtion; 0 if called by Zoltan_LB_Balance, 1 if called by Zoltan_LB_Partition. */ int *changes, /* Set to zero or one depending on if Zoltan determines a new decomposition or not: zero - No changes to the decomposition were made by the load-balancing algorithm; migration is not needed. one - A new decomposition is suggested by the load-balancer; migration is needed to establish the new decomposition. */ int *num_gid_entries, /* The number of array entries in a global ID; set to be the max over all processors in zz->Communicator of the parameter Num_Global_ID_Entries. */ int *num_lid_entries, /* The number of array entries in a local ID; set to be the max over all processors in zz->Communicator of the parameter Num_Local_ID_Entries. */ int *num_import_objs, /* The number of non-local objects in the processor's new decomposition. */ ZOLTAN_ID_PTR *import_global_ids,/* Array of global IDs for non-local objects (i.e., objs to be imported) in the processor's new decomposition. */ ZOLTAN_ID_PTR *import_local_ids, /* Array of local IDs for non-local objects (i.e., objs to be imported) in the processor's new decomposition. */ int **import_procs, /* Array of processor IDs for processors currently owning non-local objects (i.e., objs to be imported) in this processor's new decomposition. */ int **import_to_part, /* Partition to which the objects should be imported. */ int *num_export_objs, /* The number of local objects that need to be exported from the processor to establish the new decomposition. */ ZOLTAN_ID_PTR *export_global_ids,/* Array of global IDs for objects that need to be exported (assigned and sent to other processors) to establish the new decomposition. */ ZOLTAN_ID_PTR *export_local_ids, /* Array of local IDs for objects that need to be exported (assigned and sent to other processors) to establish the new decomposition. */ int **export_procs, /* Array of destination processor IDs for objects that need to be exported to establish the new decomposition. */ int **export_to_part /* Partition to which objects should be exported. */ ) { /* * Main load-balancing routine. * Input: a Zoltan structure with appropriate function pointers set. * Output: * changes * num_import_objs * import_global_ids * import_local_ids * import_procs * import_to_part * num_export_objs * export_global_ids * export_local_ids * export_procs * export_to_part * Return values: * Zoltan error code. */ char *yo = "Zoltan_LB"; int gmax; /* Maximum number of imported/exported objects over all processors. */ int error = ZOLTAN_OK; /* Error code */ double start_time, end_time; double lb_time[2] = {0.0,0.0}; char msg[256]; int comm[3],gcomm[3]; float *part_sizes = NULL, *fdummy = NULL; int wgt_dim, part_dim; int all_num_obj, i, ts, idIdx; struct Hash_Node **ht; int *export_all_procs, *export_all_to_part, *parts=NULL; ZOLTAN_ID_PTR all_global_ids=NULL, all_local_ids=NULL; ZOLTAN_ID_PTR gid; ZOLTAN_TRACE_ENTER(zz, yo); if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) Zoltan_Print_Key_Params(zz); start_time = Zoltan_Time(zz->Timer); #ifdef ZOLTAN_DRUM /* initialize DRUM if needed */ Zoltan_Drum_Create_Model(zz); /* stop DRUM monitors */ Zoltan_Drum_Stop_Monitors(zz); #endif /* * Compute Max number of array entries per ID over all processors. * Compute Max number of return arguments for Zoltan_LB_Balance. * This is a sanity-maintaining step; we don't want different * processors to have different values for these numbers. */ comm[0] = zz->Num_GID; comm[1] = zz->Num_LID; comm[2] = zz->LB.Return_Lists; MPI_Allreduce(comm, gcomm, 3, MPI_INT, MPI_MAX, zz->Communicator); zz->Num_GID = *num_gid_entries = gcomm[0]; zz->Num_LID = *num_lid_entries = gcomm[1]; zz->LB.Return_Lists = gcomm[2]; /* assume no changes */ *changes = 0; *num_import_objs = *num_export_objs = 0; *import_global_ids = NULL; *import_local_ids = NULL; *import_procs = NULL; *import_to_part = NULL; *export_global_ids = NULL; *export_local_ids = NULL; *export_procs = NULL; *export_to_part = NULL; /* * Return if this processor is not in the Zoltan structure's * communicator. */ if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) goto End; if (zz->LB.Method == NONE) { if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) printf("%s Balancing method selected == NONE; no balancing performed\n", yo); error = ZOLTAN_WARN; goto End; } /* * Sync the random number generator across processors. */ Zoltan_Srand_Sync(Zoltan_Rand(NULL), NULL, zz->Communicator); /* * Construct the heterogenous machine description. */ error = Zoltan_Build_Machine_Desc(zz); if (error == ZOLTAN_FATAL) goto End; ZOLTAN_TRACE_DETAIL(zz, yo, "Done machine description"); /* Since generating a new partition, need to free old mapping vector */ zz->LB.OldRemap = zz->LB.Remap; zz->LB.Remap = NULL; error = Zoltan_LB_Build_PartDist(zz); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) goto End; if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) { int i, np, fp; for (i = 0; i < zz->Num_Proc; i++) { Zoltan_LB_Proc_To_Part(zz, i, &np, &fp); printf("%d Proc_To_Part Proc %d NParts %d FPart %d\n", zz->Proc, i, np, fp); } } /* * Generate partitions sizes. */ #ifdef ZOLTAN_DRUM /* set partition sizes computed by DRUM, if requested */ Zoltan_Drum_Set_Part_Sizes(zz); #endif wgt_dim = zz->Obj_Weight_Dim; part_dim = ((wgt_dim > 0) ? wgt_dim : 1); part_sizes = (float *) ZOLTAN_MALLOC(sizeof(float) * part_dim * zz->LB.Num_Global_Parts); if (part_sizes == NULL) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); error = ZOLTAN_MEMERR; goto End; } /* Get partition sizes. */ Zoltan_LB_Get_Part_Sizes(zz, zz->LB.Num_Global_Parts, part_dim, part_sizes); /* * Call the actual load-balancing function. */ error = zz->LB.LB_Fn(zz, part_sizes, num_import_objs, import_global_ids, import_local_ids, import_procs, import_to_part, num_export_objs, export_global_ids, export_local_ids, export_procs, export_to_part); ZOLTAN_FREE(&part_sizes); if (error == ZOLTAN_FATAL || error == ZOLTAN_MEMERR){ sprintf(msg, "Partitioning routine returned code %d.", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } else if (error){ if (zz->Debug_Level >ZOLTAN_DEBUG_NONE) { sprintf(msg, "Partitioning routine returned code %d.", error); ZOLTAN_PRINT_WARN(zz->Proc, yo, msg); } } ZOLTAN_TRACE_DETAIL(zz, yo, "Done partitioning"); #ifdef ZOLTAN_DRUM /* restart DRUM monitors -- should happen later but there are a lot of ways out of Zoltan_LB and we want to make sure they do start */ Zoltan_Drum_Start_Monitors(zz); #endif if (*num_import_objs >= 0) MPI_Allreduce(num_import_objs, &gmax, 1, MPI_INT, MPI_MAX, zz->Communicator); else /* use export data */ MPI_Allreduce(num_export_objs, &gmax, 1, MPI_INT, MPI_MAX, zz->Communicator); if (gmax == 0) { /* * Decomposition was not changed by the load balancing; no migration * is needed. */ if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) printf("%s No changes to the decomposition due to partitioning; " "no migration is needed.\n", yo); /* * Reset num_import_objs and num_export_objs; don't want to return * -1 for the arrays that weren't returned by ZOLTAN_LB_FN. */ *num_import_objs = *num_export_objs = 0; if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS){ /* * This parameter setting requires that all local objects * and their assignments appear in the export list. */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs, export_global_ids, export_local_ids, wgt_dim, &fdummy, export_to_part); if (error == ZOLTAN_OK){ ZOLTAN_FREE(&fdummy); if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs, ZOLTAN_SPECIAL_MALLOC_INT)){ for (i=0; i<*num_export_objs; i++) (*export_procs)[i] = zz->Proc; } else{ error = ZOLTAN_MEMERR; } } } goto End; } /* * Check whether we know the import data, export data, or both. * * If we were given the import data, * we know what the new decomposition should look like on the * processor, but we don't know which of our local objects we have * to export to other processors to establish the new decomposition. * Reverse the argument if we were given the export data. * * Unless we were given both maps, compute the inverse map. */ if (*num_import_objs >= 0){ if (*num_export_objs >= 0) { /* Both maps already available; nothing to do. */; if (zz->LB.Return_Lists == ZOLTAN_LB_NO_LISTS) { /* This condition should never happen!! */ /* Methods should not return arrays if no lists are requested. */ *num_import_objs = *num_export_objs = -1; Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, import_procs, import_to_part); Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); ZOLTAN_PRINT_WARN(zz->Proc, yo, "Method returned lists, but no lists requested."); } } else if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* Export lists are requested; compute export map */ error = Zoltan_Invert_Lists(zz, *num_import_objs, *import_global_ids, *import_local_ids, *import_procs, *import_to_part, num_export_objs, export_global_ids, export_local_ids, export_procs, export_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error building return arguments; " "%d returned by Zoltan_Compute_Destinations\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } if (zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* Method returned import lists, but only export lists were desired. */ /* Import lists not needed; free them. */ *num_import_objs = -1; Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, import_procs, import_to_part); } } } else { /* (*num_import_objs < 0) */ if (*num_export_objs >= 0) { /* Only export lists have been returned. */ if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) { /* Compute import map */ error = Zoltan_Invert_Lists(zz, *num_export_objs, *export_global_ids, *export_local_ids, *export_procs, *export_to_part, num_import_objs, import_global_ids, import_local_ids, import_procs, import_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error building return arguments; " "%d returned by Zoltan_Compute_Destinations\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } if (zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) { /* Method returned export lists, but only import lists are desired. */ /* Export lists not needed; free them. */ *num_export_objs = -1; Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); } } } else { /* *num_export_objs < 0 && *num_import_objs < 0) */ if (zz->LB.Return_Lists) { /* No map at all available */ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Load-balancing function returned " "neither import nor export data."); error = ZOLTAN_WARN; goto End; } } } if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) { /* * Normally, Zoltan_LB returns in the export lists all local * objects that are moving off processor, or that are assigned * to a partition on the local processor that is not the * default partition. This setting of Return_Lists requests * that all local objects be included in the export list. */ if (*num_export_objs == 0){ /* all local objects are remaining on processor */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs, export_global_ids, export_local_ids, wgt_dim, &fdummy, export_to_part); if (error == ZOLTAN_OK){ ZOLTAN_FREE(&fdummy); if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs, ZOLTAN_SPECIAL_MALLOC_INT)){ for (i=0; i<*num_export_objs; i++) (*export_procs)[i] = zz->Proc; } else{ error = ZOLTAN_MEMERR; } } if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)) goto End; } else{ all_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &error); if (*num_export_objs < all_num_obj){ /* Create a lookup table for exported IDs */ if (*num_export_objs > 16){ /* could be 0, maybe only importing */ ts = (*num_export_objs) / 4; /* what's a good table size? */ } else{ ts = *num_export_objs; } ht = create_hash_table(zz, *export_global_ids, *num_export_objs, ts); /* Create a list of all gids, lids and partitions */ error= Zoltan_Get_Obj_List_Special_Malloc(zz, &all_num_obj, &all_global_ids, &all_local_ids, wgt_dim, &fdummy, &parts); if ((error == ZOLTAN_OK) || (error == ZOLTAN_WARN)){ ZOLTAN_FREE(&fdummy); if ((Zoltan_Special_Malloc(zz, (void **)&export_all_procs, all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0) || (Zoltan_Special_Malloc(zz, (void **)&export_all_to_part, all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0)){ error = ZOLTAN_MEMERR; } } if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)){ sprintf(msg, "Error building complete export list; " "%d returned by Zoltan_Get_Obj_List\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } gid = all_global_ids; for (i=0; i < all_num_obj; i++, gid += zz->Num_GID){ idIdx = search_hash_table(zz, gid, ht, ts); if (idIdx >= 0){ export_all_procs[i] = (*export_procs)[idIdx]; export_all_to_part[i] = (*export_to_part)[idIdx]; } else{ export_all_procs[i] = zz->Proc; export_all_to_part[i] = parts[i]; } } free_hash_table(ht, ts); Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, export_procs, export_to_part); Zoltan_Special_Free(zz, (void **)&parts, ZOLTAN_SPECIAL_MALLOC_INT); *export_global_ids = all_global_ids; *export_local_ids = all_local_ids; *export_procs = export_all_procs; *export_to_part = export_all_to_part; *num_export_objs = all_num_obj; } } } ZOLTAN_TRACE_DETAIL(zz, yo, "Done building return arguments"); end_time = Zoltan_Time(zz->Timer); lb_time[0] = end_time - start_time; if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) { int i; Zoltan_Print_Sync_Start(zz->Communicator, TRUE); printf("ZOLTAN: Objects to be imported to Proc %d\n", zz->Proc); for (i = 0; i < *num_import_objs; i++) { printf(" Obj: "); ZOLTAN_PRINT_GID(zz, &((*import_global_ids)[i*zz->Num_GID])); printf(" To partition: %4d", (*import_to_part != NULL ? (*import_to_part)[i] : zz->Proc)); printf(" From processor: %4d\n", (*import_procs)[i]); } printf("\n"); printf("ZOLTAN: Objects to be exported from Proc %d\n", zz->Proc); for (i = 0; i < *num_export_objs; i++) { printf(" Obj: "); ZOLTAN_PRINT_GID(zz, &((*export_global_ids)[i*zz->Num_GID])); printf(" To partition: %4d", (*export_to_part != NULL ? (*export_to_part)[i] : (*export_procs)[i])); printf(" To processor: %4d\n", (*export_procs)[i]); } Zoltan_Print_Sync_End(zz->Communicator, TRUE); } /* * If the Help_Migrate flag is set, perform migration for the application. */ if (zz->Migrate.Auto_Migrate) { ZOLTAN_TRACE_DETAIL(zz, yo, "Begin auto-migration"); start_time = Zoltan_Time(zz->Timer); error = Zoltan_Migrate(zz, *num_import_objs, *import_global_ids, *import_local_ids, *import_procs, *import_to_part, *num_export_objs, *export_global_ids, *export_local_ids, *export_procs, *export_to_part); if (error != ZOLTAN_OK && error != ZOLTAN_WARN) { sprintf(msg, "Error in auto-migration; %d returned from " "Zoltan_Help_Migrate\n", error); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); goto End; } end_time = Zoltan_Time(zz->Timer); lb_time[1] = end_time - start_time; ZOLTAN_TRACE_DETAIL(zz, yo, "Done auto-migration"); } /* Print timing info */ if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) { if (zz->Proc == zz->Debug_Proc) { printf("ZOLTAN Times: \n"); } Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[0], "ZOLTAN Partition: "); if (zz->Migrate.Auto_Migrate) Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[1], "ZOLTAN Migrate: "); } *changes = 1; End: ZOLTAN_TRACE_EXIT(zz, yo); return (error); }
int Zoltan_LB_Get_Part_Sizes(ZZ *zz, int num_global_parts, int part_dim, float *part_sizes) { /* * Function to get the scaled part sizes. * * Input: * zz -- The Zoltan structure to which this method * applies. * num_global_parts -- Number of global parts. * (This usually equals lb->Num_Global_Parts) * part_dim -- The number of object weights per part. * (This usually equals lb->Obj_Wgt_Dim.) * * Output: * part_sizes -- Array of floats that gives the set part * sizes, scaled such that they sum to one. */ int i, j, nparts, fpart; float *temp_part_sizes=NULL, *sum=NULL; int error = ZOLTAN_OK; char msg[128]; static char *yo = "Zoltan_LB_Get_Part_Sizes"; ZOLTAN_TRACE_ENTER(zz, yo); if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) printf("[%1d] Debug: num_global_parts = %d\n", zz->Proc, num_global_parts); /* Barrier to make sure all procs have finished Zoltan_LB_Set_Part_Sizes */ MPI_Barrier(zz->Communicator); /* For convenience, if no weights are used, set part_dim to 1 */ if (part_dim==0) part_dim = 1; if (part_sizes == NULL){ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input argument part_sizes is NULL."); error = ZOLTAN_FATAL; goto End; } /* Find max Part_Info_Len over all procs to see if they are all zero. */ MPI_Allreduce((void*) &(zz->LB.Part_Info_Len), (void*) &j, 1, MPI_INT, MPI_MAX, zz->Communicator); if (j == 0){ /* Uniform part sizes. */ zz->LB.Uniform_Parts = 1; for (i = 0; i < num_global_parts*part_dim; i++) part_sizes[i] = 1.0 / (float)num_global_parts; } else { /* Get the part sizes set by the user (application). * Each processor puts its data in a part_dim * num_global_parts * array. Then we gather all the data across processors. * Out-of-range part size data is ignored. */ zz->LB.Uniform_Parts = 0; /* Pack LB.Part_Info into temp array */ temp_part_sizes = (float *)ZOLTAN_MALLOC(num_global_parts*part_dim *sizeof(float)); sum = (float *)ZOLTAN_MALLOC(part_dim*sizeof(float)); if ((!temp_part_sizes) || (!sum)){ ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); error = ZOLTAN_MEMERR; goto End; } for (i = 0; i < num_global_parts*part_dim; i++){ temp_part_sizes[i] = -1.0; } for (i = 0; i < zz->LB.Part_Info_Len; i++){ /* Only assemble part sizes for parts and weights in the requested range. */ if (zz->LB.Part_Info[i].Idx < part_dim){ j = zz->LB.Part_Info[i].Part_id; if (zz->LB.Part_Info[i].Global_num == 0) { Zoltan_LB_Proc_To_Part(zz, zz->Proc, &nparts, &fpart); j += fpart; } if (j >= num_global_parts){ sprintf(msg, "Part number %d is >= num_global_parts %d.", j, num_global_parts); ZOLTAN_PRINT_WARN(zz->Proc, yo, msg); error = ZOLTAN_WARN; } else temp_part_sizes[j*part_dim + zz->LB.Part_Info[i].Idx] = zz->LB.Part_Info[i].Size; } } /* Reduce over all procs */ MPI_Allreduce((void*) temp_part_sizes, (void*) part_sizes, num_global_parts*part_dim, MPI_FLOAT, MPI_MAX, zz->Communicator); /* Check for errors. Scale the sizes so they sum to one for each weight. */ for (j = 0; j < part_dim; j++) sum[j] = 0.0; for (i = 0; i < num_global_parts; i++){ for (j = 0; j < part_dim; j++){ if (part_sizes[i*part_dim+j]<0) part_sizes[i*part_dim+j] = 1.0; /* default value if not set */ sum[j] += part_sizes[i*part_dim+j]; } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL){ printf("[%1d] In %s: Part size %1d (before scaling) = ", zz->Proc, yo, i); for (j = 0; j < part_dim; j++) printf("%f, ", part_sizes[i*part_dim+j]); printf("\n"); } } /* Check for sum[j] == 0 (error). */ for (j = 0; j < part_dim; j++) { if (sum[j] == 0.0) { sprintf(msg, "Sum of weights (component %1d) is zero.", j); ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg); error = ZOLTAN_FATAL; goto End; } } /* Normalize part sizes */ for (i = 0; i < num_global_parts; i++) for (j = 0; j < part_dim; j++) part_sizes[i*part_dim+j] /= sum[j]; } End: if (temp_part_sizes) ZOLTAN_FREE(&temp_part_sizes); if (sum) ZOLTAN_FREE(&sum); ZOLTAN_TRACE_EXIT(zz, yo); return error; }