Пример #1
0
int Zoltan_Drum_Set_Part_Sizes(ZZ *zz) {
  /* for now this work for a single weight dimension and exactly
     one partition per process.  It may make sense to enhance it, 
     particularly in relation to hierarchical balancing.

     Does it make sense just to use the same partition sizes for each
     weight in the context of multiple weights specified?
  */
  float part_size;
  char *yo = "Zoltan_Drum_Set_Part_Sizes";
  int ierr;
  int wgt_idx;
  int part_id;

  if (zz->Drum.use_drum == 0) return ZOLTAN_OK;

  part_size = DRUM_getLocalPartSize(zz->Drum.dmm, &ierr);
  if (ierr == DRUM_FATAL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Cannot get DRUM partition sizes");
    return ZOLTAN_FATAL;
  }

  if (zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS && zz->Proc == 0) {
    printf("Part Sizes: "); fflush(stdout);
  }
  if (zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS) {
    printf("proc %d: %.2f ", zz->Proc, part_size); fflush(stdout);
  }
  if (zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS && zz->Proc == 0) {
    printf("\n"); fflush(stdout);
  }
  wgt_idx = 0;
  part_id = 0;
  /* clear old part size info, first */
  Zoltan_LB_Set_Part_Sizes(zz, 0, -1, NULL, NULL, NULL);
  /* now set the part size computed by DRUM */
  Zoltan_LB_Set_Part_Sizes(zz, 0, 1, &part_id, &wgt_idx, &part_size);

  return ZOLTAN_OK;
}
Пример #2
0
static int Zoltan_LB(
  ZZ *zz, 
  int include_parts,             /* Flag indicating whether to generate
                                    part informtion;
                                    0 if called by Zoltan_LB_Balance,
                                    1 if called by Zoltan_LB_Partition.       */
  int *changes,                  /* Set to zero or one depending on if 
                                    Zoltan determines a new
                                    decomposition or not:
                                    zero - No changes to the decomposition
                                           were made by the load-balancing
                                           algorithm; migration is not needed.
                                    one  - A new decomposition is suggested
                                           by the load-balancer; migration is
                                           needed to establish the new
                                           decomposition.                     */
  int *num_gid_entries,          /* The number of array entries in a global ID;
                                    set to be the max over all processors in
                                    zz->Communicator of the parameter
                                    Num_Global_ID_Entries.                    */
  int *num_lid_entries,          /* The number of array entries in a local ID;
                                    set to be the max over all processors in
                                    zz->Communicator of the parameter
                                    Num_Local_ID_Entries.                     */
  int *num_import_objs,          /* The number of non-local objects in the
                                    processor's new decomposition.            */
  ZOLTAN_ID_PTR *import_global_ids,/* Array of global IDs for non-local objects
                                    (i.e., objs to be imported) in
                                    the processor's new decomposition.        */
  ZOLTAN_ID_PTR *import_local_ids,   /* Array of local IDs for non-local objects
                                    (i.e., objs to be imported) in
                                    the processor's new decomposition.        */
  int **import_procs,            /* Array of processor IDs for processors 
                                    currently owning non-local objects (i.e.,
                                    objs to be imported) in this processor's
                                    new decomposition.                        */
  int **import_to_part,          /* Partition to which the objects should be
                                    imported.                                 */
  int *num_export_objs,          /* The number of local objects that need to
                                    be exported from the processor to establish
                                    the new decomposition.                    */
  ZOLTAN_ID_PTR *export_global_ids,/* Array of global IDs for objects that need
                                    to be exported (assigned and sent to other
                                    processors) to establish the new 
                                    decomposition.                            */
  ZOLTAN_ID_PTR *export_local_ids,   /* Array of local IDs for objects that need
                                    to be exported (assigned and sent to other
                                    processors) to establish the new 
                                    decomposition.                            */
  int **export_procs,            /* Array of destination processor IDs for
                                    objects that need to be exported 
                                    to establish the new decomposition.       */
  int **export_to_part           /* Partition to which objects should be 
                                    exported.                                 */
)
{
/*
 * Main load-balancing routine.
 * Input:  a Zoltan structure with appropriate function pointers set.
 * Output: 
 *   changes
 *   num_import_objs
 *   import_global_ids
 *   import_local_ids
 *   import_procs
 *   import_to_part
 *   num_export_objs
 *   export_global_ids
 *   export_local_ids
 *   export_procs
 *   export_to_part
 * Return values:
 *   Zoltan error code.
 */

char *yo = "Zoltan_LB";
int gmax;    /* Maximum number of imported/exported objects 
                over all processors.                       */
int error = ZOLTAN_OK;    /* Error code */
double start_time, end_time;
double lb_time[2] = {0.0,0.0};
char msg[256];
int comm[3],gcomm[3]; 
float *part_sizes = NULL, *fdummy = NULL;
int wgt_dim, part_dim;
int all_num_obj, i, ts, idIdx;
struct Hash_Node **ht;
int *export_all_procs, *export_all_to_part, *parts=NULL;
ZOLTAN_ID_PTR all_global_ids=NULL, all_local_ids=NULL;
ZOLTAN_ID_PTR gid;
#ifdef ZOLTAN_OVIS
struct OVIS_parameters ovisParameters;
#endif

  ZOLTAN_TRACE_ENTER(zz, yo);

  if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS){
    printf("Build configuration:\n");
    Zoltan_Print_Configuration("  ");
    printf("\n");
    Zoltan_Print_Key_Params(zz);
  }

  start_time = Zoltan_Time(zz->Timer);

#ifdef ZOLTAN_OVIS
  Zoltan_OVIS_Setup(zz, &ovisParameters);
  if (zz->Proc == 0)
    printf("OVIS PARAMETERS %s %s %d %f\n", 
           ovisParameters.hello, 
           ovisParameters.dll, 
           ovisParameters.outputLevel, 
           ovisParameters.minVersion);
  ovis_enabled(zz->Proc, ovisParameters.dll);


#endif

  /* 
   * Compute Max number of array entries per ID over all processors.
   * Compute Max number of return arguments for Zoltan_LB_Balance.
   * This is a sanity-maintaining step; we don't want different
   * processors to have different values for these numbers.
   */
  comm[0] = zz->Num_GID;
  comm[1] = zz->Num_LID;
  comm[2] = zz->LB.Return_Lists;
  MPI_Allreduce(comm, gcomm, 3, MPI_INT, MPI_MAX, zz->Communicator);
  zz->Num_GID = *num_gid_entries = gcomm[0];
  zz->Num_LID = *num_lid_entries = gcomm[1];
  zz->LB.Return_Lists = gcomm[2];

  /* assume no changes */
  *changes = 0;

  *num_import_objs = *num_export_objs = 0;
  *import_global_ids = NULL;
  *import_local_ids = NULL;
  *import_procs = NULL;
  *import_to_part = NULL;
  *export_global_ids = NULL;
  *export_local_ids = NULL;
  *export_procs = NULL;
  *export_to_part = NULL;

  /*
   *  Return if this processor is not in the Zoltan structure's
   *  communicator.
   */

  if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) 
    goto End;

  if (zz->LB.Method == NONE) {
    if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
      printf("%s Balancing method selected == NONE; no balancing performed\n",
              yo);

    error = ZOLTAN_WARN;
    goto End;
  }

  /*
   *  Sync the random number generator across processors.
   */

  Zoltan_Srand_Sync(Zoltan_Rand(NULL), NULL, zz->Communicator);

  /* Since generating a new partition, need to free old mapping vector */
  zz->LB.OldRemap = zz->LB.Remap;
  zz->LB.Remap = NULL;

  error = Zoltan_LB_Build_PartDist(zz);
  if (error != ZOLTAN_OK && error != ZOLTAN_WARN)
    goto End;

  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
    int i, np, fp;
    for (i = 0; i < zz->Num_Proc; i++) {
      Zoltan_LB_Proc_To_Part(zz, i, &np, &fp);
      printf("%d Proc_To_Part Proc %d NParts %d FPart %d\n", 
             zz->Proc, i, np, fp);
    }
  }

  /*
   * Generate parts sizes.
   */

#ifdef ZOLTAN_OVIS
  /* set part sizes computed by OVIS, if requested. Processes set only their own value */
  {
    float part_sizes[1];
    int part_ids[1], wgt_idx[1];

    wgt_idx[0] = 0;
    part_ids[0] = 0;
    ovis_getPartsize(&(part_sizes[0])); 
    printf("Rank %d ps %f\n",zz->Proc, part_sizes[0]);
    /* clear out old part size info first */
    Zoltan_LB_Set_Part_Sizes(zz, 0, -1, NULL, NULL, NULL);
    Zoltan_LB_Set_Part_Sizes(zz, 0, 1, part_ids, wgt_idx, part_sizes);
  }
#endif

  wgt_dim = zz->Obj_Weight_Dim;
  part_dim = ((wgt_dim > 0) ? wgt_dim : 1);

  part_sizes = (float *) ZOLTAN_MALLOC(sizeof(float) * part_dim 
                                     * zz->LB.Num_Global_Parts);
  if (part_sizes == NULL) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    error = ZOLTAN_MEMERR;
    goto End;
  }

  /* Get part sizes. */
  Zoltan_LB_Get_Part_Sizes(zz, zz->LB.Num_Global_Parts, part_dim,
    part_sizes);


#ifdef ZOLTAN_OVIS
  /*  if (ovisParameters.outputlevel > 3) */
  {
    int myRank = zz->Proc;
    if (myRank == 0){
      int i, j;

      for (i = 0; i < zz->LB.Num_Global_Parts; i++){
        for (j = 0; j < part_dim; j++){
          printf("Rank %d AG: part_sizes[%d] = %f (Num_Global_Parts = %d, part_dim = %d)\n",zz->Proc,
                 (i*part_dim+j), part_sizes[i*part_dim+j],zz->LB.Num_Global_Parts, part_dim);
        }
      }
    }
  }
#endif


  /*
   * Call the actual load-balancing function.
   */

  error = zz->LB.LB_Fn(zz, part_sizes,
                       num_import_objs, import_global_ids, import_local_ids,
                       import_procs, import_to_part, 
                       num_export_objs, export_global_ids, export_local_ids, 
                       export_procs, export_to_part);

  ZOLTAN_FREE(&part_sizes);

  if (error == ZOLTAN_FATAL || error == ZOLTAN_MEMERR){
    sprintf(msg, "Partitioning routine returned code %d.", error);

#ifdef HOST_LINUX
    if ((error == ZOLTAN_MEMERR) && (Zoltan_Memory_Get_Debug() > 0)){
      Zoltan_write_linux_meminfo(0, "State of /proc/meminfo after malloc failure\n", 0);
    }
#endif

    ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
    goto End;
  }
  else if (error){
    if (zz->Debug_Level >ZOLTAN_DEBUG_NONE) {
      sprintf(msg, "Partitioning routine returned code %d.", error);
      ZOLTAN_PRINT_WARN(zz->Proc, yo, msg);
    }
  }

  ZOLTAN_TRACE_DETAIL(zz, yo, "Done partitioning");

  if (*num_import_objs >= 0)
    MPI_Allreduce(num_import_objs, &gmax, 1, MPI_INT, MPI_MAX, 
                zz->Communicator);
  else /* use export data */
    MPI_Allreduce(num_export_objs, &gmax, 1, MPI_INT, MPI_MAX, 
                zz->Communicator);

  if (gmax == 0) {

    /*
     *  Decomposition was not changed by the load balancing; no migration
     *  is needed.
     */

    if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
      printf("%s No changes to the decomposition due to partitioning; "
             "no migration is needed.\n", yo);

    /*
     *  Reset num_import_objs and num_export_objs; don't want to return
     *  -1 for the arrays that weren't returned by ZOLTAN_LB_FN.
     */

    *num_import_objs = *num_export_objs = 0;

    if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS){
      /*
       * This parameter setting requires that all local objects
       * and their assignments appear in the export list.
       */
      error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs, 
               export_global_ids, export_local_ids,
               wgt_dim, &fdummy, export_to_part);

      if (error == ZOLTAN_OK){
        ZOLTAN_FREE(&fdummy);
        if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs,
                            ZOLTAN_SPECIAL_MALLOC_INT)){
          for (i=0; i<*num_export_objs; i++)
            (*export_procs)[i] = zz->Proc;
        }
        else{
          error = ZOLTAN_MEMERR;
        }
      }
    }
    goto End;
  }

  /*
   *  Check whether we know the import data, export data, or both.
   *
   *  If we were given the import data,
   *  we know what the new decomposition should look like on the
   *  processor, but we don't know which of our local objects we have
   *  to export to other processors to establish the new decomposition.
   *  Reverse the argument if we were given the export data.
   *
   *  Unless we were given both maps, compute the inverse map.
   */
  if (zz->LB.Return_Lists == ZOLTAN_LB_NO_LISTS) {
    if (*num_import_objs >= 0) 
      Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, 
                                  import_procs, import_to_part);
    if (*num_export_objs >= 0) 
      Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, 
                                  export_procs, export_to_part);
    *num_import_objs = *num_export_objs = -1;
  }

  if (*num_import_objs >= 0){
    if (*num_export_objs >= 0) {
      /* Both maps already available; nothing to do. */;
    }
    else if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || 
             zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS ||
             zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) {
      /* Export lists are requested; compute export map */
      error = Zoltan_Invert_Lists(zz, *num_import_objs, *import_global_ids, 
                                      *import_local_ids, *import_procs,
                                      *import_to_part,
                                      num_export_objs, export_global_ids,
                                      export_local_ids, export_procs,
                                      export_to_part);
      if (error != ZOLTAN_OK && error != ZOLTAN_WARN) {
        sprintf(msg, "Error building return arguments; "
                     "%d returned by Zoltan_Compute_Destinations\n", error);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
        goto End;
      }
      if (zz->LB.Return_Lists == ZOLTAN_LB_EXPORT_LISTS ||
          zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) {
        /* Method returned import lists, but only export lists were desired. */
        /* Import lists not needed; free them. */
        *num_import_objs = -1;
        Zoltan_LB_Special_Free_Part(zz, import_global_ids, import_local_ids, 
                            import_procs, import_to_part);
      }
    }
  }
  else { /* (*num_import_objs < 0) */
    if (*num_export_objs >= 0) {
      /* Only export lists have been returned. */
      if (zz->LB.Return_Lists == ZOLTAN_LB_ALL_LISTS || 
          zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) {
        /* Compute import map */
        error = Zoltan_Invert_Lists(zz, *num_export_objs, *export_global_ids, 
                                        *export_local_ids, *export_procs,
                                        *export_to_part,
                                        num_import_objs, import_global_ids,
                                        import_local_ids, import_procs, 
                                        import_to_part);

        if (error != ZOLTAN_OK && error != ZOLTAN_WARN) {
          sprintf(msg, "Error building return arguments; "
                       "%d returned by Zoltan_Compute_Destinations\n", error);
          ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
          goto End;
        }
        if (zz->LB.Return_Lists == ZOLTAN_LB_IMPORT_LISTS) {
          /* Method returned export lists, but only import lists are desired. */
          /* Export lists not needed; free them. */
          *num_export_objs = -1;
          Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, 
                              export_procs, export_to_part);
        }
      }
    }
    else {  /* *num_export_objs < 0 && *num_import_objs < 0) */
      if (zz->LB.Return_Lists) {
        /* No map at all available */
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Load-balancing function returned "
               "neither import nor export data.");
        error = ZOLTAN_WARN;
        goto End;
      }
    }
  }

  if (zz->LB.Return_Lists == ZOLTAN_LB_COMPLETE_EXPORT_LISTS) {
    /*
     * Normally, Zoltan_LB returns in the export lists all local
     * objects that are moving off processor, or that are assigned
     * to a part on the local processor that is not the
     * default part.  This setting of Return_Lists requests
     * that all local objects be included in the export list.
     */

    if (*num_export_objs == 0){
      /* all local objects are remaining on processor */

      error= Zoltan_Get_Obj_List_Special_Malloc(zz, num_export_objs,
               export_global_ids, export_local_ids,
               wgt_dim, &fdummy, export_to_part);

      if (error == ZOLTAN_OK){
        ZOLTAN_FREE(&fdummy);
        if (*num_export_objs) {
          if (Zoltan_Special_Malloc(zz, (void **)export_procs, *num_export_objs,
                                    ZOLTAN_SPECIAL_MALLOC_INT)){
            for (i=0; i<*num_export_objs; i++)
              (*export_procs)[i] = zz->Proc;
          }
          else{
            error = ZOLTAN_MEMERR;
          }
        }
      }
      if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)) goto End;
    }
    else{
      all_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &error);

      if (*num_export_objs < all_num_obj){
  
        /* Create a lookup table for exported IDs */
  
        ts = Zoltan_Recommended_Hash_Size(*num_export_objs);
        ht = create_hash_table(zz, *export_global_ids, *num_export_objs, ts);
  
        /* Create a list of all gids, lids and parts */
  
        error= Zoltan_Get_Obj_List_Special_Malloc(zz, &all_num_obj, 
                 &all_global_ids, &all_local_ids,
                 wgt_dim, &fdummy, &parts);

        if ((error == ZOLTAN_OK) || (error == ZOLTAN_WARN)){
          ZOLTAN_FREE(&fdummy);
          if ((Zoltan_Special_Malloc(zz, (void **)(void*)&export_all_procs, 
                 all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0) ||
              (Zoltan_Special_Malloc(zz, (void **)(void*)&export_all_to_part, 
                 all_num_obj, ZOLTAN_SPECIAL_MALLOC_INT)==0)){

            error = ZOLTAN_MEMERR;
          }
        }
  
        if ((error != ZOLTAN_OK) && (error != ZOLTAN_WARN)){
          sprintf(msg, "Error building complete export list; "
                       "%d returned by Zoltan_Get_Obj_List\n", error);
          ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
          goto End;
        }
  
        gid = all_global_ids;
  
        for (i=0; i < all_num_obj; i++, gid += zz->Num_GID){
  
          idIdx = search_hash_table(zz, gid, ht, ts);
  
          if (idIdx >= 0){

            export_all_procs[i] = (*export_procs)[idIdx];
            export_all_to_part[i] = (*export_to_part)[idIdx];
          }
          else{
            export_all_procs[i] = zz->Proc;
            export_all_to_part[i] = parts[i];
          }
        }
  
        free_hash_table(ht, ts);

        Zoltan_LB_Special_Free_Part(zz, export_global_ids, export_local_ids, 
                            export_procs, export_to_part);
        Zoltan_Special_Free(zz, (void **)(void*)&parts, 
                            ZOLTAN_SPECIAL_MALLOC_INT);
  
        *export_global_ids = all_global_ids;
        *export_local_ids = all_local_ids;
        *export_procs = export_all_procs;
        *export_to_part = export_all_to_part;
        *num_export_objs = all_num_obj;
      }
    }
  }

  ZOLTAN_TRACE_DETAIL(zz, yo, "Done building return arguments");

  end_time = Zoltan_Time(zz->Timer);
  lb_time[0] = end_time - start_time;

  if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) {
    int i;
    Zoltan_Print_Sync_Start(zz->Communicator, TRUE);
    printf("ZOLTAN: Objects to be imported to Proc %d\n", zz->Proc);
    for (i = 0; i < *num_import_objs; i++) {
      printf("    Obj: ");
      ZOLTAN_PRINT_GID(zz, &((*import_global_ids)[i*zz->Num_GID]));
      printf("  To part: %4d", 
             (*import_to_part != NULL ? (*import_to_part)[i] 
                                      : zz->Proc));
      printf("  From processor: %4d\n", (*import_procs)[i]);
    }
    printf("\n");
    printf("ZOLTAN: Objects to be exported from Proc %d\n", zz->Proc);
    for (i = 0; i < *num_export_objs; i++) {
      printf("    Obj: ");
      ZOLTAN_PRINT_GID(zz, &((*export_global_ids)[i*zz->Num_GID]));
      printf("  To part: %4d",
             (*export_to_part != NULL ? (*export_to_part)[i] 
                                      : (*export_procs)[i]));
      printf("  To processor: %4d\n", (*export_procs)[i]);
    }
    Zoltan_Print_Sync_End(zz->Communicator, TRUE);
  }

  /*
   *  If the Help_Migrate flag is set, perform migration for the application.
   */

  if (zz->Migrate.Auto_Migrate) {
    ZOLTAN_TRACE_DETAIL(zz, yo, "Begin auto-migration");

    start_time = Zoltan_Time(zz->Timer);
    error = Zoltan_Migrate(zz,
                            *num_import_objs, *import_global_ids,
                            *import_local_ids, *import_procs, *import_to_part,
                            *num_export_objs, *export_global_ids,
                            *export_local_ids, *export_procs, *export_to_part);
    if (error != ZOLTAN_OK && error != ZOLTAN_WARN) {
      sprintf(msg, "Error in auto-migration; %d returned from "
                    "Zoltan_Help_Migrate\n", error);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
      goto End;
    }
    end_time = Zoltan_Time(zz->Timer);
    lb_time[1] = end_time - start_time;

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done auto-migration");
  }
  
  /* Print timing info */
  if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) {
    if (zz->Proc == zz->Debug_Proc) {
      printf("ZOLTAN Times:  \n");
    }
    Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[0], 
                   "ZOLTAN     Partition:     ");
    if (zz->Migrate.Auto_Migrate)
      Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, lb_time[1], 
                      "ZOLTAN     Migrate: ");
  }

  *changes = 1;

End:
  ZOLTAN_TRACE_EXIT(zz, yo);
  return (error);
}