Ejemplo n.º 1
0
/* This function needs a distribution : rows then cols to work properly */
int
Zoltan_ZG_Build (ZZ* zz, ZG* graph, int local)
{
  static char *yo = "Zoltan_ZG_Build";
  int ierr = ZOLTAN_OK;
  int diag;
  int *diagarray=NULL;
  Zoltan_matrix_options opt;
  char symmetrization[MAX_PARAM_STRING_LEN+1];
  char bipartite_type[MAX_PARAM_STRING_LEN+1];
  char weigth_type[MAX_PARAM_STRING_LEN+1];
  char matrix_build_type[MAX_PARAM_STRING_LEN+1];
  int bipartite = 0;
#ifdef CC_TIMERS
  double times[9]={0.,0.,0.,0.,0.,0.,0.,0.}; /* Used for timing measurements */
  double gtimes[9]={0.,0.,0.,0.,0.,0.,0.,0.}; /* Used for timing measurements */
  char *timenames[9]= {"", "setup", "matrix build", "diag", "symmetrize", "dist lin", "2D dist", "complete", "clean up"};

  MPI_Barrier(zz->Communicator);
  times[0] = Zoltan_Time(zz->Timer);
#endif /* CC_TIMERS */

  ZOLTAN_TRACE_ENTER(zz, yo);
  memset (graph, 0, sizeof(ZG));

  /* Read graph build parameters */
  Zoltan_Bind_Param(ZG_params, "GRAPH_SYMMETRIZE", (void *) &symmetrization);
  Zoltan_Bind_Param(ZG_params, "GRAPH_SYM_WEIGHT", (void *) &weigth_type);
  Zoltan_Bind_Param(ZG_params, "GRAPH_BIPARTITE_TYPE", (void *) &bipartite_type);
  Zoltan_Bind_Param(ZG_params, "GRAPH_BUILD_TYPE", (void*) &matrix_build_type);

  /* Set default values */
  strncpy(symmetrization, "NONE", MAX_PARAM_STRING_LEN);
  strncpy(bipartite_type, "OBJ", MAX_PARAM_STRING_LEN);
  strncpy(weigth_type, "ADD", MAX_PARAM_STRING_LEN);
  strncpy(matrix_build_type, "NORMAL", MAX_PARAM_STRING_LEN);

  Zoltan_Assign_Param_Vals(zz->Params, ZG_params, zz->Debug_Level, zz->Proc,
			   zz->Debug_Proc);

  Zoltan_Matrix2d_Init(&graph->mtx);

  graph->mtx.comm = (PHGComm*)ZOLTAN_MALLOC (sizeof(PHGComm));
  if (graph->mtx.comm == NULL) MEMORY_ERROR;
  Zoltan_PHGComm_Init (graph->mtx.comm);

  memset(&opt, 0, sizeof(Zoltan_matrix_options));
  opt.enforceSquare = 1;      /* We want a graph: square matrix */
  if (!strcasecmp(weigth_type, "ADD"))
    opt.pinwgtop = ADD_WEIGHT;
  else if (!strcasecmp(weigth_type, "MAX"))
    opt.pinwgtop = MAX_WEIGHT;
  else if (!strcasecmp(weigth_type, "CMP"))
    opt.pinwgtop = MAX_WEIGHT;
  opt.pinwgt = 1;
  opt.randomize = 0;
  opt.local = local;
  opt.keep_distribution = 1;
  if (strcasecmp(symmetrization, "NONE")) {
    opt.symmetrize = 1;
  }
  if (!strcasecmp(matrix_build_type, "FAST"))
    opt.speed = MATRIX_FAST;
  else if (!strcasecmp(matrix_build_type, "FAST_NO_DUP"))
    opt.speed = MATRIX_NO_REDIST;
  else
    opt.speed = MATRIX_FULL_DD;

#ifdef CC_TIMERS
  times[1] = Zoltan_Time(zz->Timer);
#endif

  ierr = Zoltan_Matrix_Build(zz, &opt, &graph->mtx.mtx);
  CHECK_IERR;

#ifdef CC_TIMERS
  times[2] = Zoltan_Time(zz->Timer);
#endif

  ierr = Zoltan_Matrix_Mark_Diag (zz, &graph->mtx.mtx, &diag, &diagarray);
  CHECK_IERR;
  if (diag) { /* Some Diagonal Terms have to be removed */
    ierr = Zoltan_Matrix_Delete_nnz(zz, &graph->mtx.mtx, diag, diagarray);
    ZOLTAN_FREE(&diagarray);
    CHECK_IERR;
  }

#ifdef CC_TIMERS
  times[3] = Zoltan_Time(zz->Timer);
#endif

  if (opt.symmetrize) {
    if (!strcasecmp(symmetrization, "BIPARTITE"))
      bipartite = 1;
    ierr = Zoltan_Matrix_Sym(zz, &graph->mtx.mtx, bipartite);
    CHECK_IERR;
  }

#ifdef CC_TIMERS
  times[4] = Zoltan_Time(zz->Timer);
#endif

  ierr = Zoltan_Distribute_LinearY(zz, graph->mtx.comm);
  CHECK_IERR;

#ifdef CC_TIMERS
  times[5] = Zoltan_Time(zz->Timer);
  MPI_Barrier(zz->Communicator);
#endif
  ierr = Zoltan_Matrix2d_Distribute (zz, graph->mtx.mtx, &graph->mtx, 0);
  CHECK_IERR;

#ifdef CC_TIMERS
  times[6] = Zoltan_Time(zz->Timer);
#endif
  ierr = Zoltan_Matrix_Complete(zz, &graph->mtx.mtx);

#ifdef CC_TIMERS
  times[7] = Zoltan_Time(zz->Timer);
#endif

  if (bipartite) {
    int vertlno;
    int limit;
    int offset;

    graph->bipartite = 1;
    graph->fixed_vertices = graph->mtx.mtx.ybipart;
/*     graph->fixed_vertices = (int*) ZOLTAN_MALLOC(graph->mtx.mtx.nY*sizeof(int)); */
/*     if (graph->mtx.mtx.nY && graph->fixed_vertices == NULL) MEMORY_ERROR; */
/*     limit = graph->mtx.mtx.offsetY; */
/*     /\* What kind of vertices do we want to keep ? *\/ */
/*     graph->fixObj = !strcasecmp(bipartite_type, "OBJ"); /\* Non-zero value means "objects" *\/ */

/*     offset = graph->mtx.mtx.offsetY - graph->mtx.dist_y[graph->mtx.comm->myProc_y]; */
/*     if (graph->fixObj) /\* What kind of vertices do we want to keep ? *\/ */
/*       for (vertlno = 0 ; vertlno < graph->mtx.mtx.nY ; ++ vertlno) */
/* 	graph->fixed_vertices[vertlno] = (vertlno < offset); */
/*     else */
/*       for (vertlno = 0 ; vertlno < graph->mtx.mtx.nY ; ++ vertlno) */
/* 	graph->fixed_vertices[vertlno] = (vertlno >= offset); */
  }

#ifdef CC_TIMERS
  MPI_Barrier(zz->Communicator);
  times[8] = Zoltan_Time(zz->Timer);

  MPI_Reduce(times, gtimes, 9, MPI_DOUBLE, MPI_MAX, 0, zz->Communicator);
  if (!zz->Proc) {
      int i;
      printf("Total Build Time in Proc-0: %.2lf    Max: %.2lf\n", times[8]-times[0], gtimes[8]-times[0]);
      for (i=1; i<9; ++i)
          printf("%-13s in Proc-0: %8.2lf  Max: %8.2lf\n", timenames[i],  times[i]-times[i-1], gtimes[i]-gtimes[i-1]);
  }
#endif

 End:
  ZOLTAN_FREE(&diagarray);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return (ierr);
}
Ejemplo n.º 2
0
int Zoltan_PHG_CoarsePartition(
  ZZ *zz, 
  HGraph *phg,         /* Input:  coarse hypergraph -- distributed! */
  int numPart,         /* Input:  number of partitions to generate. */
  float *part_sizes,   /* Input:  array of size numPart listing target sizes
                                  (% of work) for the partitions */
  Partition part,      /* Input:  array of initial partition assignments.
                          Output: array of computed partition assignments.   */
  PHGPartParams *hgp   /* Input:  parameters to use.  */
)
{
/* 
 * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph.
 * Typically, this routine is called at the bottom level in a
 * multilevel scheme (V-cycle).
 * It gathers the distributed hypergraph to each processor and computes
 * a decomposition of the serial hypergraph.  
 * It computes a different partition on each processor
 * using different random numbers (and possibly also
 * different algorithms) and selects the best.
 */
char *yo = "Zoltan_PHG_CoarsePartition";
int ierr = ZOLTAN_OK;
int i, si, j;
static PHGComm scomm;          /* Serial communicator info */
static int first_time = 1;
HGraph *shg = NULL;            /* Serial hypergraph gathered from phg */
int *spart = NULL;             /* Partition vectors for shg. */
int *new_part = NULL;          /* Ptr to new partition vector. */
float *bestvals = NULL;        /* Best cut values found so far */
int worst, new_cand;
float bal, cut, worst_cut;
int fine_timing = (hgp->use_timers > 2);
struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
int local_coarse_part = hgp->LocalCoarsePartition;

/* Number of iterations to try coarse partitioning on each proc. */
/* 10 when p=1, and 1 when p is large. */
const int num_coarse_iter = 1 + 9/zz->Num_Proc; 


  ZOLTAN_TRACE_ENTER(zz, yo);

  if (fine_timing) {
    if (timer->cpgather < 0)
      timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
    if (timer->cprefine < 0)
      timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
    if (timer->cpart < 0)
      timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");

    ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
  }


  /* Force LocalCoarsePartition if large global graph */
#define LARGE_GRAPH_VTX   64000
#define LARGE_GRAPH_PINS 256000
  if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){
    /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */
    local_coarse_part = 1;
  }

  /* take care of all special cases first */

  if (!strcasecmp(hgp->coarsepartition_str, "no")
      || !strcasecmp(hgp->coarsepartition_str, "none")) {
    /* Do no coarse partitioning. */
    /* Do a sanity test and  mapping to parts [0,...,numPart-1] */
    int first = 1;
    PHGComm *hgc=phg->comm;    

    Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm);
    if (hgp->UsePrefPart) {
        for (i = 0; i < phg->nVtx; i++) {
            /* Impose fixed vertex/preferred part constraints. */
            if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */
                /* randomly assigned to a part */
                part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart);
            } else {
                if (phg->bisec_split < 0)
                    /* direct k-way, use part numbers directly */
                    part[i] = phg->pref_part[i];
                else
                    /* recursive bisection, map to 0-1 part numbers */
                    part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1);
            }            
        }
    } else {
        for (i = 0; i < phg->nVtx; i++) {
            if (part[i] >= numPart || part[i]<0) {
                if (first) {
                    ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts.");
                    first = 0;
                    ierr = ZOLTAN_WARN;
                }
                part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart;
            }        
        }
    }
  }
  else if (numPart == 1) {            
    /* everything goes in the one partition */
    for (i =  0; i < phg->nVtx; i++)
      part[i] = 0;
  }
  else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { 
    /* more partitions than vertices, trivial answer */
    for (i = 0; i < phg->nVtx; i++)
      part[i] = phg->dist_x[phg->comm->myProc_x]+i;
  }
  else if (local_coarse_part) {
    /* Apply local partitioner to each column */
    ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp,
                                    hgp->CoarsePartition);
  }
  else {
    /* Normal case:
     * Gather distributed HG to each processor;
     * compute different partitioning on each processor;
     * select the "best" result.
     */
    ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition;

    /* Select different coarse partitioners for processors here. */

    CoarsePartition = hgp->CoarsePartition;
    if (CoarsePartition == NULL) { /* auto */
      /* Select a coarse partitioner from the array of coarse partitioners */
      CoarsePartition = CoarsePartitionFns[phg->comm->myProc % 
                                           NUM_COARSEPARTITION_FNS];
    }


    if (phg->comm->nProc == 1) {
      /* Serial and parallel hgraph are the same. */
      shg = phg;
    }
    else {
      /* Set up a serial communication struct for gathered HG */

      if (first_time) {
        scomm.nProc_x = scomm.nProc_y = 1;
        scomm.myProc_x = scomm.myProc_y = 0;
        scomm.Communicator = MPI_COMM_SELF;
        scomm.row_comm = MPI_COMM_SELF;
        scomm.col_comm = MPI_COMM_SELF;
        scomm.myProc = 0;
        scomm.nProc = 1;
        first_time = 0;
      }
      scomm.RNGState = Zoltan_Rand(NULL);
      scomm.RNGState_row = Zoltan_Rand(NULL);
      scomm.RNGState_col = Zoltan_Rand(NULL);
      scomm.zz = zz;

      /* 
       * Gather parallel hypergraph phg to each processor, creating
       * serial hypergraph shg.
       */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator);
      }

      ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather.");
        goto End;
      }

      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

    }

    /* 
     * Allocate partition array spart for the serial hypergraph shg
     * and partition shg.
     */
    spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1),
                                    sizeof(int));
    bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); 
    if ((!spart) || (!bestvals)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }
    
    /* Compute several coarse partitionings. */
    /* Keep the NUM_PART_KEEP best ones around. */
    /* Currently, only the best one is used. */

    /* Set RNG so different procs compute different parts. */
    Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL);

    new_cand = 0;
    new_part = spart;

    for (i=0; i< num_coarse_iter; i++){
      int savefmlooplimit=hgp->fm_loop_limit;
        
      /* Overwrite worst partition with new candidate. */
      ierr = CoarsePartition(zz, shg, numPart, part_sizes, 
               new_part, hgp);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from CoarsePartition.");
        goto End;
      }

      /* time refinement step in coarse partitioner */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator);
      }

      /* UVCUVC: Refine new candidate: only one pass is enough. */
      hgp->fm_loop_limit = 1;
      Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp);
      hgp->fm_loop_limit = savefmlooplimit;
      
      /* stop refinement timer */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

      /* Decide if candidate is in the top tier or not. */
      /* Our objective is a combination of cuts and balance */

      bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, 
                                       numPart, new_part); 
      cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr);
      
      /* Use ratio-cut as our objective. There are many other options! */
      bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */

      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from Zoltan_PHG_Compute_ConCut.");
        goto End;
      }
      if (i<NUM_PART_KEEP)
        new_cand = i+1;
      else {
        /* find worst partition vector, to overwrite it */
        /* future optimization: keep bestvals sorted */
        worst = 0;
        worst_cut = bestvals[0];
        for (j=1; j<NUM_PART_KEEP+1; j++){
          if (worst_cut < bestvals[j]){
            worst_cut = bestvals[j];
            worst = j;
          }
        }
        new_cand = worst;
      }
      new_part = spart+new_cand*(shg->nVtx);
    }
    /* Copy last partition vector such that all the best ones
       are contiguous starting at spart.                     */
    for (i=0; i<shg->nVtx; i++){
      new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i];
    }
    /* Also update bestvals */
    bestvals[new_cand] = bestvals[NUM_PART_KEEP];

    /* Evaluate and select the best. */
    /* For now, only pick the best one, in the future we pick the k best. */

    ierr = pick_best(zz, hgp, phg->comm, shg, numPart, 
              MIN(NUM_PART_KEEP, num_coarse_iter), spart,
              bestvals);
    if (ierr < 0) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                        "Error returned from pick_best.");
      goto End;
    }
  
    if (phg->comm->nProc > 1) {
      /* Map gathered partition back to 2D distribution */
      for (i = 0; i < phg->nVtx; i++) {
        /* KDDKDD  Assume vertices in serial HG are ordered by GNO of phg */
        si = VTX_LNO_TO_GNO(phg, i);
        part[i] = spart[si];
      }

      Zoltan_HG_HGraph_Free(shg);
      ZOLTAN_FREE(&shg);
    } 
    else { /* single processor */
      for (i = 0; i < phg->nVtx; i++)
        part[i] = spart[i];
    }
    ZOLTAN_FREE(&spart);
    ZOLTAN_FREE(&bestvals);
  }
  
End:
  if (fine_timing) 
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ierr;
}
Ejemplo n.º 3
0
int Zoltan_PHG_Vertex_Visit_Order(
  ZZ *zz, 
  HGraph *hg, 
  PHGPartParams *hgp, 
  int *order)
{
  int i, j, edge;
  int *ldegree=NULL, *gdegree=NULL; /* local/global degree */
  int *lpins=NULL, *gpins=NULL; /* local/global sum of pins */
  char *yo= "Zoltan_PHG_Vertex_Visit_Order";

  /* Start with linear order. */
  for (i=0; i<hg->nVtx; i++)
    order[i] = i;

  /* Permute order array according to chosen strategy. */
  switch (hgp->visit_order){
    case 0: 
      /* random node visit order (recommended)  */
      /* Synchronize so each proc in column visits in same order */
      Zoltan_Srand_Sync(Zoltan_Rand(NULL), &(hg->comm->RNGState_col),
                        hg->comm->col_comm);
      Zoltan_Rand_Perm_Int (order, hg->nVtx, &(hg->comm->RNGState_col));
      break;

    case 1: 
      /* linear (natural) vertex visit order */
      break;

    case 2:
    {
      /* increasing vertex weight */
      float *tmpvwgt;

      if (hg->VtxWeightDim == 1)
        tmpvwgt = hg->vwgt;
      else {
        /* Sort based on first component of multidimensional weight */
        tmpvwgt = (float *) ZOLTAN_MALLOC(hg->nVtx * sizeof(float));
        for (i = 0; i < hg->nVtx; i++)
          tmpvwgt[i] = hg->vwgt[i*hg->VtxWeightDim];
      } 
      
      Zoltan_quicksort_pointer_inc_float (order, tmpvwgt, 0, hg->nVtx-1);
      if (tmpvwgt != hg->vwgt) ZOLTAN_FREE(&tmpvwgt);
      break;
    }

    case 3: 
      /* increasing vertex degree */
      /* intentionally fall through into next case */
    case 4: 
      /* increasing vertex degree, weighted by # pins */

      /* allocate 4 arrays of size hg->nVtx with a single malloc */
      if (!(ldegree = (int *) ZOLTAN_MALLOC (4*sizeof(int) * hg->nVtx))){
        ZOLTAN_PRINT_WARN(zz->Proc, yo, "Out of memory");
        ZOLTAN_FREE (&ldegree);
        return ZOLTAN_MEMERR;
      }
      /* first local data, then global data */
      lpins = ldegree + hg->nVtx;
      gdegree = lpins + hg->nVtx;
      gpins = gdegree + hg->nVtx;

      /* loop over vertices */
      for (i=0; i<hg->nVtx; i++){
         ldegree[i] = hg->vindex[i+1] - hg->vindex[i]; /* local degree */
         lpins[i] = 0;
         /* loop over edges, sum up #pins */
         for (j= hg->vindex[i]; j < hg->vindex[i+1]; j++) {
           edge = hg->vedge[j];
           lpins[i] += hg->hindex[edge+1] - hg->hindex[edge];
         }
      }

      /* sum up local degrees in each column to get global degrees */
      /* also sum up #pins in same communication */
      MPI_Allreduce(ldegree, gdegree, 2*hg->nVtx, MPI_INT, MPI_SUM, 
         hg->comm->col_comm);

      /* sort by global values. same on every processor. */
      if (hgp->visit_order == 3)
        Zoltan_quicksort_pointer_inc_int_int (order, gdegree, gpins,
          0, hg->nVtx-1);
      else /* hgp->visit_order == 4 */
        Zoltan_quicksort_pointer_inc_int_int (order, gpins, gdegree,
          0, hg->nVtx-1);

      ZOLTAN_FREE (&ldegree);
      break;

    /* add more cases here */
  }

  return ZOLTAN_OK;
}
Ejemplo n.º 4
0
int Zoltan_LB_Get_Part_Sizes(ZZ *zz, 
    int num_global_parts, int part_dim, float *part_sizes)
{
/*
 *  Function to get the scaled part sizes.
 *
 *  Input:
 *    zz            --  The Zoltan structure to which this method
 *                      applies.
 *    num_global_parts -- Number of global parts.
 *                      (This usually equals lb->Num_Global_Parts)
 *    part_dim      --  The number of object weights per part.
 *                      (This usually equals lb->Obj_Wgt_Dim.)
 *
 *  Output:
 *    part_sizes    --  Array of floats that gives the set part 
 *                      sizes, scaled such that they sum to one.
 */
  int i, j, nparts, fpart;
  float *temp_part_sizes=NULL, *sum=NULL;
  int error = ZOLTAN_OK;
  char msg[128];
  static char *yo = "Zoltan_LB_Get_Part_Sizes";

  ZOLTAN_TRACE_ENTER(zz, yo);
  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL)
    printf("[%1d] Debug: num_global_parts = %d\n", zz->Proc, num_global_parts);

  /* Barrier to make sure all procs have finished Zoltan_LB_Set_Part_Sizes */
  MPI_Barrier(zz->Communicator);

  /* For convenience, if no weights are used, set part_dim to 1 */
  if (part_dim==0) part_dim = 1;

  if (part_sizes == NULL){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input argument part_sizes is NULL.");
    error = ZOLTAN_FATAL;
    goto End;
  }

  /* Find max Part_Info_Len over all procs to see if they are all zero. */
  MPI_Allreduce((void*) &(zz->LB.Part_Info_Len), (void*) &j, 
      1, MPI_INT, MPI_MAX, zz->Communicator);

  if (j == 0){
    /* Uniform part sizes. */
    zz->LB.Uniform_Parts = 1;
    for (i = 0; i < num_global_parts*part_dim; i++)
      part_sizes[i] = 1.0 / (float)num_global_parts;
  }
  else {
   /* Get the part sizes set by the user (application).
    * Each processor puts its data in a part_dim * num_global_parts
    * array. Then we gather all the data across processors.
    * Out-of-range part size data is ignored.
    */
    zz->LB.Uniform_Parts = 0;

    /* Pack LB.Part_Info into temp array */
    temp_part_sizes = (float *)ZOLTAN_MALLOC(num_global_parts*part_dim
      *sizeof(float));
    sum = (float *)ZOLTAN_MALLOC(part_dim*sizeof(float));
    if ((!temp_part_sizes) || (!sum)){
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      error = ZOLTAN_MEMERR;
      goto End;
    }
    for (i = 0; i < num_global_parts*part_dim; i++){
      temp_part_sizes[i] = -1.0;
    }
    for (i = 0; i < zz->LB.Part_Info_Len; i++){
      /* Only assemble part sizes for parts and weights
         in the requested range. */
      if (zz->LB.Part_Info[i].Idx < part_dim){
        j = zz->LB.Part_Info[i].Part_id;
        if (zz->LB.Part_Info[i].Global_num == 0) {
          Zoltan_LB_Proc_To_Part(zz, zz->Proc, &nparts, &fpart);
          j += fpart;
        }
        if (j >= num_global_parts){
          sprintf(msg, "Part number %d is >= num_global_parts %d.",
            j, num_global_parts);
          ZOLTAN_PRINT_WARN(zz->Proc, yo, msg);
          error = ZOLTAN_WARN;
        }
        else
          temp_part_sizes[j*part_dim + zz->LB.Part_Info[i].Idx] 
            = zz->LB.Part_Info[i].Size;
      }
    }

    /* Reduce over all procs */
    MPI_Allreduce((void*) temp_part_sizes, (void*) part_sizes, 
      num_global_parts*part_dim, MPI_FLOAT, MPI_MAX, zz->Communicator);
  
    /* Check for errors. Scale the sizes so they sum to one for each weight. */
    for (j = 0; j < part_dim; j++) 
      sum[j] = 0.0;

    for (i = 0; i < num_global_parts; i++){
      for (j = 0; j < part_dim; j++){
        if (part_sizes[i*part_dim+j]<0)
          part_sizes[i*part_dim+j] = 1.0; /* default value if not set */
        sum[j] += part_sizes[i*part_dim+j];
      }

      if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL){
        printf("[%1d] In %s: Part size %1d (before scaling) = ",  
            zz->Proc, yo, i);
        for (j = 0; j < part_dim; j++)
          printf("%f, ",  part_sizes[i*part_dim+j]);
        printf("\n");
      }
    }

    /* Check for sum[j] == 0 (error). */
    for (j = 0; j < part_dim; j++) {
      if (sum[j] == 0.0) {
        sprintf(msg, "Sum of weights (component %1d) is zero.", j);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
        error = ZOLTAN_FATAL;
        goto End;
      }
    }

    /* Normalize part sizes */
    for (i = 0; i < num_global_parts; i++)
      for (j = 0; j < part_dim; j++)
        part_sizes[i*part_dim+j] /= sum[j];

  }
 
End:
  if (temp_part_sizes) ZOLTAN_FREE(&temp_part_sizes);
  if (sum)             ZOLTAN_FREE(&sum);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return error;
}
Ejemplo n.º 5
0
int Zoltan_RIB_Build_Structure(ZZ *zz, int *num_obj, int *max_obj, int wgtflag,
                               double overalloc, int use_ids)
{
/* Function to build the geometry-based data structures for RIB method. */
char           *yo = "Zoltan_RIB_Build_Structure";
RIB_STRUCT     *rib;                  /* Data structure for RIB.             */
struct rib_tree *treeptr;
int            i, ierr = 0;

  /* Allocate an RIB data structure for this Zoltan structure.
     If the previous data structure is still there, free the Dots and IDs first;
     the other fields can be reused. */

  if (zz->LB.Data_Structure == NULL) {
    rib = (RIB_STRUCT *) ZOLTAN_MALLOC(sizeof(RIB_STRUCT));
    if (rib == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      return(ZOLTAN_MEMERR);
    }
    zz->LB.Data_Structure = (void *) rib;
    rib->Tree_Ptr = NULL;
    rib->Global_IDs = NULL;
    rib->Local_IDs = NULL;
    rib->Dots = NULL;

    Zoltan_Initialize_Transformation(&(rib->Tran));

    rib->Tree_Ptr = (struct rib_tree *)
              ZOLTAN_MALLOC(zz->LB.Num_Global_Parts* sizeof(struct rib_tree));
    if (rib->Tree_Ptr == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      Zoltan_RIB_Free_Structure(zz);
      return(ZOLTAN_MEMERR);
    }
    /* initialize Tree_Ptr */
    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      treeptr = &(rib->Tree_Ptr[i]);
      treeptr->cm[0] = treeptr->cm[1] = treeptr->cm[2] = 0.0;
      treeptr->ev[0] = treeptr->ev[1] = treeptr->ev[2] = 0.0;
      treeptr->cut = 0.0;
      treeptr->parent = treeptr->left_leaf = treeptr->right_leaf = 0;
    }
  }
  else {
    rib = (RIB_STRUCT *) zz->LB.Data_Structure;
    ZOLTAN_FREE(&(rib->Global_IDs));
    ZOLTAN_FREE(&(rib->Local_IDs));
    ZOLTAN_FREE(&(rib->Dots));
  }

  ierr = Zoltan_RB_Build_Structure(zz, &(rib->Global_IDs), &(rib->Local_IDs),
                               &(rib->Dots), num_obj, max_obj, &(rib->Num_Geom),
                               wgtflag, overalloc, use_ids);
  if (ierr) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo,
                       "Error returned from Zoltan_RB_Build_Structure.");
    Zoltan_RIB_Free_Structure(zz);
    return(ierr);
  }

  return(ZOLTAN_OK);
}
Ejemplo n.º 6
0
int
Zoltan_PHG_2ways_hyperedge_partition (
  ZZ *zz,                            /* Input : Zoltan data structure */
  HGraph *hg,
  Partition parts,
  Zoltan_PHG_Tree *tree,
  struct Zoltan_DD_Struct * gnoToGID,
  struct Zoltan_DD_Struct **dd,
  int *numParts,
  int **sizeParts
)
{
  int ierr = ZOLTAN_OK;
  char *yo = "Zoltan_PHG_2ways_hyperedge_partition";
  int nEdge, hEdge;
  int *interval;
  int *partnumber = NULL;
  int tree_size;
  ZOLTAN_ID_TYPE *rowpart =NULL;  /* ZOLTAN_ID_TYPE because it's used in Zoltan_DD_* */
  ZOLTAN_GNO_TYPE *rowGNO = NULL;
  ZOLTAN_ID_PTR rowGID=NULL;
  int index;
  int offset;

  ZOLTAN_TRACE_ENTER(zz, yo);

  nEdge = hg->nEdge;
  fprintf (stderr, "HG (%d %d x %d) : %d %d\n", hg->comm->myProc, hg->comm->myProc_x, hg->comm->myProc_y,  hg->nVtx, nEdge);

  interval = (int*)ZOLTAN_MALLOC(nEdge*2*sizeof(int));
  if ((nEdge > 0 ) && (interval == NULL)) MEMORY_ERROR;

  tree_size = get_tree_size(tree) + 1;
  for (index = 0 ; index < nEdge ; ++index){
    SET_MIN_NODE(interval, index, tree_size);
    SET_MAX_NODE(interval, index, -1);
  }

  /* Update interval with the local knowledge */
  /* XXX: I loop on the hyperedges, as I think it's more cache friendly
   * and it allows me to discoupled the computation if a non blocking MPI_Reduce is
   * available
   */
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge){
    int part;
    int max = -1;                     /* Trick : we use the initialized values */
    int min = tree_size + 1;

    for (index = hg->hindex[hEdge] ; index < hg->hindex[hEdge+1] ; ++ index) {
      part = parts[hg->hvertex[index]];

      max = MAX(max, part);
      min = MIN(min, part);
    }
    SET_MIN_NODE(interval, hEdge, min);
    SET_MAX_NODE(interval, hEdge, max);
  }

  /* Update results to view the complete hyperedges */
  Zoltan_AllReduceInPlace (interval, 2*nEdge, MPI_INT, MPI_MAX, hg->comm->row_comm);

  /* Now I have to compute the partition of hyperedges according to the "interval"
   * and the tree */

  /* First, compute the partition number corresponding to the nodes in the tree */
  partnumber = compute_part_number(tree);
  if (partnumber == NULL) {
    ierr = ZOLTAN_FATAL;
    goto End;
  }

  (*numParts) = get_tree_size(tree);

  rowpart = (ZOLTAN_ID_TYPE*) ZOLTAN_MALLOC(nEdge*sizeof(ZOLTAN_ID_TYPE));
  if ((nEdge > 0) && (rowpart == NULL)) MEMORY_ERROR;

  rowGNO = (ZOLTAN_GNO_TYPE*) ZOLTAN_MALLOC(nEdge*sizeof(ZOLTAN_GNO_TYPE));
  if ((nEdge > 0) && (rowGNO == NULL)) MEMORY_ERROR;

  (*sizeParts) = (int*)ZOLTAN_CALLOC((*numParts), sizeof(int));
  if (*numParts && (*sizeParts) == NULL) MEMORY_ERROR;

  offset = hg->dist_y[hg->comm->myProc_y];
  (void) offset; /* forestall warning for "variable set but unused" */
  /* Then we search we is the hyperedge in the tree */
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge) {
    int node;
    node = find_interval_in_tree(tree, interval+2*hEdge);
    rowpart[hEdge] = partnumber[node];
    (*sizeParts)[rowpart[hEdge]] ++;
    rowGNO[hEdge] = EDGE_LNO_TO_GNO(hg, hEdge);
#if 0
    fprintf (stderr, "%zd : " ZOLTAN_ID_SPEC " (%d : %d - %d)\n", rowGNO[hEdge], rowpart[hEdge], node, -interval[2*hEdge], interval[2*hEdge+1]);
#endif
  }

  partnumber += 1;
  ZOLTAN_FREE(&partnumber);
  ZOLTAN_FREE(&interval);

  /* Compute number of elements per parts */
  /* TODO: support processor which are not part of the distribution */

  /* Update results to view the complete hyperedges */
  Zoltan_AllReduceInPlace ((*sizeParts), (*numParts), MPI_INT, MPI_SUM, hg->comm->col_comm);


  /* Export results to data directory */
  /* First, get the GIDs of our edges */
  rowGID = ZOLTAN_MALLOC_GID_ARRAY(zz, nEdge);
  if (nEdge && rowGID == NULL) MEMORY_ERROR;
  ierr = Zoltan_DD_Find (gnoToGID , (ZOLTAN_ID_PTR)rowGNO, rowGID, NULL, NULL,
                         nEdge, NULL);
  ZOLTAN_FREE(&rowGNO);

  ierr = Zoltan_DD_Create (dd, zz->Communicator, zz->Num_GID, 1, 0, nEdge, 0);
  CHECK_IERR;

  /* Make our new numbering public */
  Zoltan_DD_Update (*dd, (ZOLTAN_ID_PTR)rowGID, rowpart, NULL,  NULL, nEdge);

#ifdef CEDRIC_PRINT
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge) {
    fprintf (stderr, "%d : %d\n", rowGID[hEdge], rowpart[hEdge]);
  }
#endif


 End:
  ZOLTAN_FREE(&rowGID);
  ZOLTAN_FREE(&rowGNO);
  ZOLTAN_FREE(&rowpart);

  if (partnumber != NULL)
    partnumber += 1;
  ZOLTAN_FREE(&partnumber);
  ZOLTAN_FREE(&interval);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
Ejemplo n.º 7
0
static int gather_and_build_remap(
  ZZ *zz, 
  int *new_map,               /* Upon return, flag indicating whether parts
                                 assignments were changed due to remap. */
  int HEcnt,                  /* # of HEs allocated. */
  int *HEinfo                 /* Array of HE info; for each HE, two pins and 
                                 one edge weight. Stored as a single vector
                                 to minimize communication calls.  */
)
{
char *yo = "gather_and_remap";
int ierr = ZOLTAN_OK;
int i, uidx, tmp;
int *each_size = NULL;        /* sizes (# HEs * HEINFO_ENTRIES) for each proc */
int *recvbuf = NULL;          /* Receive buffer for gatherv */
int *displs = NULL;           /* Displacement buffer for gatherv */
int send_size;                /* Local # HEs * HEINFO_ENTRIES */
int total_size;               /* Total # ints in gatherv */
int total_HEcnt;              /* Total (across all procs) number of HEs. */
int max0, max1;               /* Max values of pin 0 and pin 1 for each HE. */
int *match = NULL;            /* Vector describing the matching. 
                                 match[i] = j ==> match[j] = i ==> 
                                 vertices i and j are matched. */
int *used = NULL;             /* Vector indicating which partitions are used
                                 in the matching. */
int limit;                    /* Maximum number of matches that are allowed */
HGraph hg;                    /* Hypergraph for matching */
float before_remap = 0,       /* Amount of data that overlaps between old and */
      after_remap = 0;        /* new decomposition before and after remapping, 
                                 respectively. */
float with_oldremap = 0;      /* Amount of data that overlaps between old and
                                 new decomposition using the OldRemap vector
                                 (remapping from the previous decomposition). */


  /* Gather HEs from each processor into a local complete HG. */

  each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int));
  if (!each_size) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }
  send_size = HEcnt * HEINFO_ENTRIES;
  MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT,
                zz->Communicator);

  for (total_size = 0, i = 0; i < zz->Num_Proc; i++) {
    total_size += each_size[i];
  }

  recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int));
  displs = recvbuf + total_size;
  if (!recvbuf) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  displs[0] = 0;
  for (i = 1; i < zz->Num_Proc; i++)
    displs[i] = displs[i-1] + each_size[i-1];

  MPI_Allgatherv(HEinfo, send_size, MPI_INT, 
                 recvbuf, each_size, displs, MPI_INT, zz->Communicator);

  total_HEcnt = total_size / HEINFO_ENTRIES;
  for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) {
    tmp = i * HEINFO_ENTRIES;
    if (recvbuf[tmp] > max0) max0 = recvbuf[tmp];
    if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1];
  }
  /* Increment max0 and max1 so that they are the maximum number of unique
     pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */
  max0++;
  max1++;
  
  /* Sanity check */
  /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes
   * does not return the correct number of non-empty partitions, allowing
   * max1 to be less than LB.Num_Global_Parts. 
   * (e.g., ewgt.adaptive-partlocal1-v3.4.?).
   */
  if (max1 > zz->LB.Num_Global_Parts) 
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1.");

  /* Set up global HG */

  Zoltan_HG_HGraph_Init(&hg);
  if (total_HEcnt) {
    hg.nVtx = max0 + zz->LB.Num_Global_Parts;  
    hg.nEdge = total_HEcnt;
    hg.nPins = total_HEcnt * 2;   /* two pins per HE */
    hg.EdgeWeightDim = 1;
    hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float));
    hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int));
    hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int));
    if (!hg.ewgt || !hg.hindex || !hg.hvertex) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < total_HEcnt; i++) {
      tmp = i * HEINFO_ENTRIES;
      hg.hindex[i] = i+i; 
      hg.hvertex[i+i] = recvbuf[tmp];
      hg.hvertex[i+i+1] = (int)recvbuf[tmp+1]+max0;
      hg.ewgt[i] = recvbuf[tmp+2];
    }
    hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt;

    ierr = Zoltan_HG_Create_Mirror(zz, &hg);
    if (ierr < 0) goto End;
  }

  before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE");

  /* Compute the amount of overlap when using the old remap vector. */

  with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD");

  /* Do matching */

  match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int));
  used = match + hg.nVtx;
  if (hg.nVtx && !match) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  /* Max # matches allowed */
  limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); 
  do_match(zz, &hg, match, limit);

      
  /* Build remapping vector, if non-trivial matching was returned. */

  *new_map = 0;
  for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
    if (match[i+max0] != i+max0) {
      *new_map = 1;
      break;
    }

  if (*new_map) {

    zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int));
    if (!(zz->LB.Remap)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }


    /* First, process all parts that were matched. Mark matched parts as used.*/

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      zz->LB.Remap[i] = -1; 
      tmp = match[i+max0];
      if (tmp != i+max0) {
        zz->LB.Remap[i] = tmp;
        used[tmp] = 1;
      }
    }

    /* Second, process unmatched parts; if possible, keep same part number. */

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      if (!used[i]) {  /* Keep the same part number if it is not used */
        zz->LB.Remap[i] = i;
        used[i] = 1;
      }
    }
  
    /* Third, process remaining unmatched parts; assign them to 
       unused partitions.*/
  
    for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      while (used[uidx]) uidx++;   /* Find next unused partition */
      zz->LB.Remap[i] = uidx;
      used[uidx] = 1;
    }
  }

  if (*new_map) 
    after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER ");

  if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) {
    /* No benefit from remapping; don't keep it! */
    ZOLTAN_FREE(&zz->LB.Remap);
    ZOLTAN_FREE(&zz->LB.OldRemap);
    *new_map = 0;
  }
  else if (with_oldremap >= after_remap) {
    /* The old remap vector is better than the new one; keep the old one. */
    ZOLTAN_FREE(&zz->LB.Remap);
    zz->LB.Remap = zz->LB.OldRemap;
    zz->LB.OldRemap = NULL;
    *new_map = 1;
  }
  else {
    /* Going to use the new remap vector; free the old one. */
    ZOLTAN_FREE(&zz->LB.OldRemap);
  }

  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc &&
      zz->LB.Remap) 
    for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
      printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]);

End:
  ZOLTAN_FREE(&match);
  ZOLTAN_FREE(&each_size);
  ZOLTAN_FREE(&recvbuf);
  Zoltan_HG_HGraph_Free(&hg);
  return ierr;
}
Ejemplo n.º 8
0
int Zoltan_Oct_migrate_octants(ZZ *zz, int *newpids, pOctant *octs, int nocts, int *nrecocts) {
  int i,j = 0;
  int nsends = 0;
  int nreceives = 0;
  int *despid = NULL;
  OCTNEW_msg *snd_reply = NULL;
  OCTNEW_msg *rcv_reply = NULL;
  int ierr = ZOLTAN_OK;
  ZOLTAN_COMM_OBJ *comm_plan;        /* Object returned by communication routines */
  char *yo = "Zoltan_Oct_migrate_octants";
  pOctant *newocts = NULL;                          /* New foreign octant pointers */

  if((newocts = (pOctant *) ZOLTAN_MALLOC(sizeof(pOctant)*(nocts+10))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  for (i=0; i<nocts; i++)
    newocts[i]=NULL;


  /* count number of sends */

  nsends=0;
  for (i=0; i<nocts; i++)            
    if (newpids[i]!=zz->Proc) 
      nsends++;

  /* build message array */

/*   if(nsends > 0) { */
    if((snd_reply = (OCTNEW_msg *) ZOLTAN_MALLOC((nsends + 1) * sizeof(OCTNEW_msg))) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
    if((despid = (int *) ZOLTAN_MALLOC((nsends+10) * sizeof(int))) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&snd_reply);
      return ZOLTAN_MEMERR;
    }
/*   } */
/*   else { */
/*     snd_reply = NULL; */
/*     despid = NULL; */
/*   } */

  j = 0;
  for (i=0; i<nocts; i++)                    
    if (newpids[i]!=zz->Proc) {  
      snd_reply[j].num = i;
      despid[j++] = newpids[i];
    }
   
  /* send messages */

  ierr = Zoltan_Comm_Create(&comm_plan, nsends, despid, zz->Communicator, MigOctCommCreate, &nreceives);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&snd_reply);
    ZOLTAN_FREE(&despid);
    return (ierr);
  }

  if((rcv_reply = (OCTNEW_msg *) ZOLTAN_MALLOC((nreceives + 1) * sizeof(OCTNEW_msg))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&snd_reply);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&rcv_reply);
    return ZOLTAN_MEMERR;
  }

  ierr = Zoltan_Comm_Do(comm_plan, MigOctCommDo, (char *) snd_reply,
		    sizeof(OCTNEW_msg), (char *) rcv_reply);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&snd_reply);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&rcv_reply);
    return (ierr);
  }
    /* Reply to malloc requests and Receive malloc replies */
    
  for (i=0; i< nreceives; i++) {  
    rcv_reply[i].ptr = Zoltan_Oct_POct_new((OCT_Global_Info *) (zz->LB.Data_Structure)); 
  }
;
  ierr = Zoltan_Comm_Do_Reverse(comm_plan, MigOctCommReverse, (char *) rcv_reply,
			    sizeof(OCTNEW_msg), NULL, (char *) snd_reply);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&snd_reply);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&rcv_reply);
    return (ierr);
  }
  


  /* store remote pointers locally for future migration */
  for (i=0; i<nsends; i++) {                  
    newocts[snd_reply[i].num] = snd_reply[i].ptr;
  }
    
  ierr = Zoltan_Comm_Destroy(&comm_plan);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&snd_reply);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&rcv_reply);
    return (ierr);
  }


  ZOLTAN_FREE(&snd_reply);
  ZOLTAN_FREE(&despid);
  ZOLTAN_FREE(&rcv_reply);

  /* set return value */

  *nrecocts = nreceives;
  ierr = Zoltan_Oct_Update_Connections(zz, octs, newpids, newocts, nocts);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    abort();
    return (ierr);
  }
  ierr = Zoltan_Oct_Final_Migration(zz, octs,newpids,newocts,nocts, *nrecocts);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    abort();
    return (ierr);
  }
  
  Zoltan_Oct_Update_Map(zz);

  ZOLTAN_FREE(&newocts);
  return ierr;
}
Ejemplo n.º 9
0
int Zoltan_ParMetis_Order(
  ZZ *zz,               /* Zoltan structure */
  int num_obj,          /* Number of (local) objects to order. */
  ZOLTAN_ID_PTR gids,   /* List of global ids (local to this proc) */
                        /* The application must allocate enough space */
  ZOLTAN_ID_PTR lids,   /* List of local ids (local to this proc) */
                        /* The application must allocate enough space */
  ZOLTAN_ID_PTR rank,   /* rank[i] is the rank of gids[i] */
  int *iperm,
  ZOOS *order_opt       /* Ordering options, parsed by Zoltan_Order */
)
{
  static char *yo = "Zoltan_ParMetis_Order";
  int i, n, ierr;
  ZOLTAN_Output_Order ord;
  ZOLTAN_Third_Graph gr;

#ifdef ZOLTAN_PARMETIS
  MPI_Comm comm = zz->Communicator;/* don't want to risk letting external 
                                      packages changing our communicator */
#endif
  indextype numflag = 0;

  int timer_p = 0;
  int get_times = 0;
  int use_timers = 0;
  double times[5];

  ZOLTAN_ID_PTR       l_gids = NULL;
  ZOLTAN_ID_PTR       l_lids = NULL;

  indextype options[MAX_OPTIONS];
  char alg[MAX_PARAM_STRING_LEN+1];

  ZOLTAN_TRACE_ENTER(zz, yo);

#ifdef ZOLTAN_PARMETIS
#if TPL_USE_DATATYPE != TPL_METIS_DATATYPES

#ifdef TPL_FLOAT_WEIGHT
  i = 1;
#else
  i = 0;
#endif

  if ((sizeof(indextype) != sizeof(idxtype)) ||
      (sizeof(weighttype) != sizeof(idxtype)) || i){

    ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL,
          "Not supported: Multiple 3rd party libraries with incompatible "
          "data types.");
    return ZOLTAN_FATAL;
  }
#endif
#endif

  memset(&gr, 0, sizeof(ZOLTAN_Third_Graph));
  memset(&ord, 0, sizeof(ZOLTAN_Output_Order));
  memset(times, 0, sizeof(times));

  ord.order_opt = order_opt;

  if (!order_opt){
    /* If for some reason order_opt is NULL, allocate a new ZOOS here. */
    /* This should really never happen. */
    order_opt = (ZOOS *) ZOLTAN_MALLOC(sizeof(ZOOS));
    strcpy(order_opt->method,"PARMETIS");
  }

  ierr = Zoltan_Parmetis_Parse(zz, options, alg, NULL, NULL, &ord);
  /* ParMetis only computes the rank vector */
  order_opt->return_args = RETURN_RANK;

  /* Check that num_obj equals the number of objects on this proc. */
  /* This constraint may be removed in the future. */
  n = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr);
  if ((ierr!= ZOLTAN_OK) && (ierr!= ZOLTAN_WARN)){
    /* Return error code */
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Get_Num_Obj returned error.");
    return(ZOLTAN_FATAL);
  }
  if (n != num_obj){
    /* Currently this is a fatal error. */
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input num_obj does not equal the "
                                     "number of objects.");
    return(ZOLTAN_FATAL);
  }

  /* Do not use weights for ordering */
  gr.obj_wgt_dim = -1;
  gr.edge_wgt_dim = -1;
  gr.num_obj = num_obj;

  /* Check what ordering type is requested */
  if (order_opt){
      SET_GLOBAL_GRAPH(&gr.graph_type); /* GLOBAL by default */

#ifdef ZOLTAN_PARMETIS
      if ((strcmp(order_opt->method, "METIS") == 0))
#endif /* ZOLTAN_PARMETIS */
      SET_LOCAL_GRAPH(&gr.graph_type);
  }
  gr.get_data = 1;

  if (IS_LOCAL_GRAPH(gr.graph_type) && zz->Num_Proc > 1) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Serial ordering on more than 1 process: "
                                     "set ParMetis instead.");
    return(ZOLTAN_FATAL);
  }

  timer_p = Zoltan_Preprocess_Timer(zz, &use_timers);

    /* Start timer */
  get_times = (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME);
  if (get_times){
    MPI_Barrier(zz->Communicator);
    times[0] = Zoltan_Time(zz->Timer);
  }

  ierr = Zoltan_Preprocess_Graph(zz, &l_gids, &l_lids,  &gr, NULL, NULL, NULL);
  if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)) {
    Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, NULL);
    return (ierr);
  }

  /* Allocate space for separator sizes */

  if (IS_GLOBAL_GRAPH(gr.graph_type)) {
    if (Zoltan_TPL_Order_Init_Tree(&zz->TPL_Order, 2*zz->Num_Proc, zz->Num_Proc) != ZOLTAN_OK) {
      /* Not enough memory */
      Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord);
      ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory.");
    }
    ord.sep_sizes = (indextype*)ZOLTAN_MALLOC((2*zz->Num_Proc+1)*sizeof(indextype));
    if (ord.sep_sizes == NULL) {
      Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord);
      ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory.");
    }
    memset(ord.sep_sizes, 0, (2*zz->Num_Proc+1)*sizeof(int)); /* It seems parmetis don't initialize correctly */
  }

  /* Allocate space for direct perm */
  ord.rank = (indextype *) ZOLTAN_MALLOC(gr.num_obj*sizeof(indextype));
  if (!ord.rank){
    /* Not enough memory */
    Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord);
    ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory.");
  }
  if (IS_LOCAL_GRAPH(gr.graph_type)){
  /* Allocate space for inverse perm */
    ord.iperm = (indextype *) ZOLTAN_MALLOC(gr.num_obj*sizeof(indextype));
    if (!ord.iperm){
      /* Not enough memory */
      Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, &ord);
      ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory.");
    }
  }
  else
    ord.iperm = NULL;

  /* Get a time here */
  if (get_times) times[1] = Zoltan_Time(zz->Timer);

#ifdef ZOLTAN_PARMETIS
  if (IS_GLOBAL_GRAPH(gr.graph_type)){
    ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library");

    ParMETIS_V3_NodeND (gr.vtxdist, gr.xadj, gr.adjncy, 
                        &numflag, options, ord.rank, ord.sep_sizes, &comm);
    ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");

  }
  else
#endif /* ZOLTAN_PARMETIS */
#if defined(ZOLTAN_METIS) || defined(ZOLTAN_PARMETIS)
 if (IS_LOCAL_GRAPH(gr.graph_type)) { /* Be careful : permutation parameters are in the opposite order */
    indextype numobj = gr.num_obj;
    ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the METIS library");
    options[0] = 0;  /* Use default options for METIS. */
    order_opt->return_args = RETURN_RANK|RETURN_IPERM; /* We provide directly all the permutations */

    METIS_NodeND(&numobj, gr.xadj, gr.adjncy, &numflag, options, 
                 ord.iperm, ord.rank);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the METIS library");
  }
#endif /* ZOLTAN_METIS */

  /* Get a time here */
  if (get_times) times[2] = Zoltan_Time(zz->Timer);

  if (IS_GLOBAL_GRAPH(gr.graph_type)){ /* Update Elimination tree */
    int numbloc;
    int start;
    int leaf;
    int *converttab;
    int levelmax;

    levelmax = mylog2(zz->Num_Proc) + 1;
    converttab = (int*)ZOLTAN_MALLOC(zz->Num_Proc*2*sizeof(int));

    memset(converttab, 0, zz->Num_Proc*2*sizeof(int));
     /* Determine the first node in each separator, store it in zz->TPL_Order.start */
    for (numbloc = 0, start=0, leaf=0; numbloc < zz->Num_Proc /2; numbloc++) {
      int father;

      father = zz->Num_Proc + numbloc;
      converttab[start] = 2*numbloc;
      zz->TPL_Order.leaves[leaf++]=start;
      zz->TPL_Order.ancestor[start] = start + 2;
      converttab[start+1] = 2*numbloc+1;
      zz->TPL_Order.leaves[leaf++]=start+1;
      zz->TPL_Order.ancestor[start+1] = start + 2;
      start+=2;
      do {
        converttab[start] = father;
        if (father %2 == 0) {
          int nextoffset;
          int level;

          level = mylog2(2*zz->Num_Proc - 1 - father);
          nextoffset = (1<<(levelmax-level));
          zz->TPL_Order.ancestor[start] = start+nextoffset;
          start++;
          break;
        }
        else {
          zz->TPL_Order.ancestor[start] = start+1;
          start++;
          father = zz->Num_Proc + father/2;
        }
      } while (father < 2*zz->Num_Proc - 1);
    }

    zz->TPL_Order.start[0] = 0;
    zz->TPL_Order.ancestor [2*zz->Num_Proc - 2] = -1;
    for (numbloc = 1 ; numbloc < 2*zz->Num_Proc ; numbloc++) {
      int oldblock=converttab[numbloc-1];
      zz->TPL_Order.start[numbloc] = zz->TPL_Order.start[numbloc-1] + ord.sep_sizes[oldblock];
    }

    ZOLTAN_FREE(&converttab);
    ZOLTAN_FREE(&ord.sep_sizes);

    zz->TPL_Order.leaves[zz->Num_Proc] = -1;
    zz->TPL_Order.nbr_leaves = zz->Num_Proc;
    zz->TPL_Order.nbr_blocks = 2*zz->Num_Proc-1;
  }
  else { /* No tree */
    zz->TPL_Order.nbr_blocks = 0;
    zz->TPL_Order.start = NULL;
    zz->TPL_Order.ancestor = NULL;
    zz->TPL_Order.leaves = NULL;
  }

  /* Correct because no redistribution */
  memcpy(gids, l_gids, n*zz->Num_GID*sizeof(ZOLTAN_ID_TYPE));
  memcpy(lids, l_lids, n*zz->Num_LID*sizeof(ZOLTAN_ID_TYPE));

  ierr = Zoltan_Postprocess_Graph (zz, l_gids, l_lids, &gr, NULL, NULL, NULL, &ord, NULL);

  ZOLTAN_FREE(&l_gids); 
  ZOLTAN_FREE(&l_lids);

  /* Get a time here */
  if (get_times) times[3] = Zoltan_Time(zz->Timer);

  if (get_times) Zoltan_Third_DisplayTime(zz, times);

  if (use_timers)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_p, zz->Communicator);

  if (sizeof(indextype) == sizeof(ZOLTAN_ID_TYPE)){
    memcpy(rank, ord.rank, gr.num_obj*sizeof(indextype));
  }
  else{
    for (i=0; i < gr.num_obj; i++){
      rank[i] = (ZOLTAN_ID_TYPE)ord.rank[i];
    }
  }

  if ((ord.iperm != NULL) && (iperm != NULL)){
    if (sizeof(indextype) == sizeof(int)){
      memcpy(iperm, ord.iperm, gr.num_obj*sizeof(indextype));
    }
    else{
      for (i=0; i < gr.num_obj; i++){
        iperm[i] = (int)ord.iperm[i];
      }
    }
  }

  if (ord.iperm != NULL)  ZOLTAN_FREE(&ord.iperm);
  ZOLTAN_FREE(&ord.rank);

  /* Free all other "graph" stuff */
  Zoltan_Third_Exit(&gr, NULL, NULL, NULL, NULL, NULL);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ZOLTAN_OK);
}
Ejemplo n.º 10
0
static int Zoltan_Oct_Final_Migration(
ZZ *zz,
pOctant *octs,      /* octs[nocts]    */
int *newpids,       /* newpids[nocts] */
pOctant *newocts,   /* newocts[nocts] */
int nocts,          /* number of octants leaving this processor */
int nrecocts)       /* number of octants received in this processor */
{
  int i;
  Migrate_msg *msnd = NULL, *mrcv = NULL;
  int remotecount;
  int nsends;
  int nreceives;
  int *despid = NULL;
  int ierr = ZOLTAN_OK;
  ZOLTAN_COMM_OBJ *comm_plan;           /* Object returned by communication routines */
  char *yo = "Zoltan_Oct_Final_Migration";
  OCT_Global_Info *OCT_info = (OCT_Global_Info *) zz->LB.Data_Structure;

  /* count number of sends */
  nsends=0;
  for (i=0; i<nocts; i++)              
    if (newpids[i]!=zz->Proc)
      nsends++;

  if((despid = (int *) ZOLTAN_MALLOC((nocts+10) * sizeof(int))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  if((msnd   = (Migrate_msg *) ZOLTAN_MALLOC((nocts+10) * sizeof(Migrate_msg))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    return ZOLTAN_MEMERR;
  }

  remotecount = 0;
  for (i=0; i<nocts; i++)                          /* Send and free */
    if (newpids[i]!=zz->Proc)
      { 
	FILLMIGRATEMSG(octs[i], newocts[i], msnd[remotecount], zz->Proc); /* bug */
	despid[remotecount++] = newpids[i];
	Zoltan_Oct_clearRegions(octs[i]);
        /* KDDKDDFREE Change oct to &oct to allow NULL from ZOLTAN_FREE 
         * KDDKDDFREE to propagate back. */
	Zoltan_Oct_POct_free(OCT_info, &(octs[i]));
      }

  ierr = Zoltan_Comm_Create(&comm_plan, remotecount, despid, zz->Communicator,
			MigFinCommCreate, &nreceives);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&msnd);
    return (ierr);
  }

  if((mrcv = (Migrate_msg *) ZOLTAN_MALLOC((nreceives+10) * sizeof(Migrate_msg))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&msnd);
    return ZOLTAN_MEMERR;
  }

  ierr = Zoltan_Comm_Do(comm_plan, MigFinCommDo, (char *) msnd,
		    sizeof(Migrate_msg), (char *) mrcv);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&msnd);
    ZOLTAN_FREE(&mrcv);
    return (ierr);
  }

  for (i=0; i<nreceives; i++) {                     /* Receive new parocts */
/*     Zoltan_Oct_setID(mrcv[i].ptr,mrcv[i].id); */
/*     Zoltan_Oct_POct_setparent(OCT_info, mrcv[i].ptr, mrcv[i].parent, mrcv[i].ppid);  */
    SETOCTFROMMIGRATEMSG(OCT_info, mrcv[i]); 
/*     Zoltan_Oct_setchildnum(mrcv[i].ptr,mrcv[i].childnum);  */
/*     Zoltan_Oct_setchildren(mrcv[i].ptr, mrcv[i].children, mrcv[i].cpids);  */
/*     Zoltan_Oct_setbounds(mrcv[i].ptr, mrcv[i].min, mrcv[i].max);   */
/*     Zoltan_Oct_setDir(mrcv[i].ptr,mrcv[i].dir); */
/*     Zoltan_Oct_setMapIdx(mrcv[i].ptr,mrcv[i].mapidx); */
  }

  ierr = Zoltan_Comm_Destroy(&comm_plan);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&msnd);
    ZOLTAN_FREE(&mrcv);
    return (ierr);
  }

  ZOLTAN_FREE(&despid);
  ZOLTAN_FREE(&msnd);
  ZOLTAN_FREE(&mrcv);

  return ierr;
}
Ejemplo n.º 11
0
static int Zoltan_Oct_build_global_rootlist(ZZ *zz,Migrate_msg  **ret_rmsg, int *size) {
  int j, k = 0;
  int *despid = NULL;
  int nroots, nreceives;
  pRList  RootList;                  /* list of the local roots */
  pOctant RootOct;
  Migrate_msg *snd_rmsg = NULL;
  Migrate_msg *rcv_rmsg = NULL;
  OCT_Global_Info *OCT_info = (OCT_Global_Info *)(zz->LB.Data_Structure);
/*Map *array = OCT_info->map;*/
  ZOLTAN_COMM_OBJ *comm_plan;                /* Object returned by communication routines */

  int ierr = ZOLTAN_OK;
  char *yo = "Zoltan_Oct_build_global_rootlist";
 
  nroots = RL_numRootOctants(Zoltan_Oct_POct_localroots(OCT_info));


  if (nroots > 0) {  /* KDDKDD -- Added test to prevent departure before comm */
  if((despid = (int *) ZOLTAN_MALLOC((zz->Num_Proc)*nroots * sizeof(int))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ZOLTAN_MEMERR;
  }

  if((snd_rmsg = (Migrate_msg *) ZOLTAN_MALLOC((zz->Num_Proc)*nroots * sizeof(Migrate_msg))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    return ZOLTAN_MEMERR;
  }
  
  k = 0;
  for (j=0; j<zz->Num_Proc; j++) {
    RootList = Zoltan_Oct_POct_localroots(OCT_info);
    while((RootOct = RL_nextRootOctant(&RootList))) {	
 /*      if(array[Zoltan_Oct_mapidx(RootOct)].npid > 0) { */
	FILLMIGRATEMSG(RootOct, RootOct, snd_rmsg[k], zz->Proc);
	despid[k] = j;
	k++;
/*       } */
    }
  }
  }  /* KDDKDD */
  
  ierr = Zoltan_Comm_Create(&comm_plan, k, despid, zz->Communicator,
			RootListCommCreate, &nreceives);

  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&snd_rmsg);
    return (ierr);
  }

  if (nreceives > 0) {
  if((rcv_rmsg = (Migrate_msg *) ZOLTAN_MALLOC(nreceives * sizeof(Migrate_msg))) == NULL) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&snd_rmsg);
    return ZOLTAN_MEMERR;
  }
  }

  

  ierr = Zoltan_Comm_Do(comm_plan, RootListCommDo, (char *) snd_rmsg, sizeof(Migrate_msg), (char *) rcv_rmsg);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&snd_rmsg);
    ZOLTAN_FREE(&rcv_rmsg);
    return (ierr);
  }

  ZOLTAN_FREE(&despid);
  ZOLTAN_FREE(&snd_rmsg);


  ierr = Zoltan_Comm_Destroy(&comm_plan);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
  }

  *ret_rmsg = rcv_rmsg;
  *size = nreceives;

  return ierr;
}
Ejemplo n.º 12
0
static int Zoltan_Oct_Update_Connections(
ZZ *zz,
pOctant *octs,      /* octs[nocts]    */
int *newpids,       /* newpids[nocts] */
pOctant *newocts,   /* newocts[nocts] */
int nocts)          /* number of octants leaving this processor */
{
  int i, j;
  int nsends;
  int nreceives;
  pOctant parent;
  pOctant child;
  int ppid;
  int cpid;
  int childnum;
  int *despid = NULL;
  Update_msg umsg;
  Update_msg *localumsg = NULL;
  Update_msg *remoteumsg = NULL;
  Update_msg *rcv_umsg = NULL;
  int localcount;
  int remotecount;
  int ierr = ZOLTAN_OK;


  ZOLTAN_COMM_OBJ *comm_plan;           /* Object returned by communication routines */
  char *yo = "Zoltan_Oct_Update_Connections";
  OCT_Global_Info *OCT_info = (OCT_Global_Info *) zz->LB.Data_Structure;
  localcount=0;
  remotecount=0;

  /* count number of sends */
  nsends = 0;
  for (i=0; i<nocts; i++)              
    if (newpids[i]!=zz->Proc)
      nsends++;

  if(nocts > 0) {
    if((remoteumsg = (Update_msg *) ZOLTAN_MALLOC((nocts+1) * sizeof(Update_msg)*9)) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
    
    if((localumsg  = (Update_msg *) ZOLTAN_MALLOC((nocts+1) * sizeof(Update_msg)*9)) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&remoteumsg);
      return ZOLTAN_MEMERR;
    }
    
    if((despid = (int *) ZOLTAN_MALLOC((nocts+1) * sizeof(int)*9)) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&remoteumsg);
      ZOLTAN_FREE(&localumsg);
      return ZOLTAN_MEMERR;
    }
  }
  else {
    remoteumsg = NULL;
    localumsg = NULL;
    despid = NULL;
  }
  localcount = 0;
  remotecount = 0;

  for (i=0; i<nocts; i++)                       /* Send connection updates */
    if (newpids[i]!=zz->Proc) {
	parent = Zoltan_Oct_parent(octs[i]); 
        ppid   = Zoltan_Oct_Ppid(octs[i]); 
        childnum = Zoltan_Oct_childnum(octs[i]);
	if (parent) {      /* Let parent of oct[i] know that it's moving   */
	  if (ppid==zz->Proc) {
	    FILLUPDATEMSG(localumsg[localcount], parent, childnum, newocts[i], newpids[i]);
	    localcount++;
	  }
	  else {
	    FILLUPDATEMSG(remoteumsg[remotecount], parent, childnum, newocts[i], newpids[i]);
	    despid[remotecount++] = ppid;
	  }
	}
	for (j=0; j<8; j++) {
	  child = Zoltan_Oct_child(octs[i],j);
	  cpid = octs[i]->cpid[j];
	  /* Tell child of oct[i] that it is moving */
	  if (child) {
	    if (cpid==zz->Proc) {
	      /* NOTE: -1 signals PARENT   */
	      FILLUPDATEMSG(localumsg[localcount], child, -1, newocts[i], newpids[i]);
	      localcount++;
	    }
	    else {
	      /* NOTE: -1 signals PARENT   */
	      FILLUPDATEMSG(remoteumsg[remotecount], child, -1, newocts[i], newpids[i]); 
	      despid[remotecount++] = cpid;
	    }
	  }
	}
    }

  ierr = Zoltan_Comm_Create(&comm_plan, remotecount, despid, zz->Communicator,
			MigUpdCommCreate, &nreceives);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&remoteumsg);
    ZOLTAN_FREE(&localumsg);
    ZOLTAN_FREE(&despid);
    return (ierr);
  }


/*   if(nreceives > 0) { */
    if((rcv_umsg = (Update_msg *) ZOLTAN_MALLOC((nreceives +1) * sizeof(Update_msg)*9)) == NULL) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&remoteumsg);
      ZOLTAN_FREE(&localumsg);
      ZOLTAN_FREE(&despid);
      return ZOLTAN_MEMERR;
    }

    
    ierr = Zoltan_Comm_Do(comm_plan, MigUpdCommDo, (char *) remoteumsg,
		      sizeof(Update_msg), (char *) rcv_umsg);
    if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&remoteumsg);
      ZOLTAN_FREE(&localumsg);
      ZOLTAN_FREE(&despid);
      ZOLTAN_FREE(&rcv_umsg);
      return (ierr);
    }

/*   } */
/*   else { */
/*     rcv_umsg = NULL; */
/*   } */
  /* update new octants */
  for (i=0; i< (localcount+nreceives); i++)  {   
    if (i<localcount) 
      umsg=localumsg[i];
    else 
      umsg=rcv_umsg[i-localcount];
    if (umsg.childnum>=0) {
      Zoltan_Oct_setchild(umsg.oct,umsg.childnum,umsg.newptr);
      Zoltan_Oct_setCpid(umsg.oct,umsg.childnum,umsg.newpid);
    }
    else {
      if((Zoltan_Oct_data_newpid(umsg.oct) ==  OCT_info->OCT_localpid) ||
	 ((Zoltan_Oct_data_newpid(umsg.oct) !=  OCT_info->OCT_localpid) && (umsg.newpid == OCT_info->OCT_localpid)))
	Zoltan_Oct_POct_setparent(OCT_info, umsg.oct,umsg.newptr,umsg.newpid);
      else {
	umsg.oct->ppid = umsg.newpid;
	umsg.oct->parent = umsg.newptr;
      }
    }
  }

  ierr = Zoltan_Comm_Destroy(&comm_plan);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&remoteumsg);
    ZOLTAN_FREE(&localumsg);
    ZOLTAN_FREE(&despid);
    ZOLTAN_FREE(&rcv_umsg);
    return (ierr);
  }

  ZOLTAN_FREE(&remoteumsg);
  ZOLTAN_FREE(&localumsg);
  ZOLTAN_FREE(&rcv_umsg);
  ZOLTAN_FREE(&despid);
  return ierr;
}
Ejemplo n.º 13
0
static int Zoltan_Oct_migreg_migrate_regions(ZZ *zz, Region *regions, 
                              ZOLTAN_ID_PTR gids, ZOLTAN_ID_PTR lids,
                              int *npids, int nregions, int *c2) 
{
  char *yo = "Zoltan_Oct_migreg_migrate_regions";
  int i;                         /* index counter */
  int ierr = ZOLTAN_OK;
  int n_import;
  ZOLTAN_COMM_OBJ *comm_plan;           /* Object returned by communication routines */
  Region *import_objs = NULL;    /* Array of import objects used to request 
				    the objs from other processors. */
  ZOLTAN_ID_PTR import_gids = NULL;  /* Array of global IDs of import_objs. */
  ZOLTAN_ID_PTR import_lids = NULL;  /* Array of local IDs of import_objs. */
  int num_gid_entries = zz->Num_GID;
  int num_lid_entries = zz->Num_LID;

  ierr = Zoltan_Comm_Create(&comm_plan, nregions, npids, zz->Communicator, 
                        MIGMIGREGCommCreate, &n_import);
  if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Create");
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
  }
  *c2 = n_import;
  if (n_import > 0) {
    import_objs = (Region *) ZOLTAN_MALLOC(n_import * sizeof(Region));
    import_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, n_import);
    import_lids = ZOLTAN_MALLOC_LID_ARRAY(zz, n_import);

    if (!import_objs || !import_gids || (num_lid_entries && !import_lids)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
  }
  ierr = Zoltan_Comm_Do(comm_plan, MIGMIGREGCommDo, (char *) regions, sizeof(Region), 
                   (char *) import_objs);
  if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&import_objs);
    ZOLTAN_FREE(&import_gids);
    ZOLTAN_FREE(&import_lids);
    return (ierr);
  }

  ierr = Zoltan_Comm_Do(comm_plan, MIGMIGREGCommDo-1, (char *) gids, 
                    sizeof(ZOLTAN_ID_TYPE)*num_gid_entries, (char *) import_gids);
  if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
    ZOLTAN_TRACE_EXIT(zz, yo);
    ZOLTAN_FREE(&import_objs);
    ZOLTAN_FREE(&import_gids);
    ZOLTAN_FREE(&import_lids);
    return (ierr);
  }

  if (num_lid_entries > 0) {
    ierr = Zoltan_Comm_Do(comm_plan, MIGMIGREGCommDo-2, (char *) lids, 
                      sizeof(ZOLTAN_ID_TYPE)*num_lid_entries, (char *) import_lids);
    if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&import_objs);
      ZOLTAN_FREE(&import_gids);
      ZOLTAN_FREE(&import_lids);
      return (ierr);
    }
  }
  for (i=0; i<n_import; i++) {
    import_objs[i].Global_ID = &(import_gids[i*num_gid_entries]);
    import_objs[i].Local_ID = (num_lid_entries 
                                 ? &(import_lids[i*num_lid_entries]) 
                                 : NULL);
    Zoltan_Oct_insert_orphan(zz, import_objs[i]);
  }
  
  ZOLTAN_FREE(&import_objs);
  ZOLTAN_FREE(&import_gids);
  ZOLTAN_FREE(&import_lids);

  ierr = Zoltan_Comm_Destroy(&comm_plan);
  if(ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
  }
  return ierr;
}
Ejemplo n.º 14
0
int Zoltan_Oct_migreg_migrate_orphans(ZZ *zz, pRegion RegionList, int nregions,
                               int level, OMap *array, int *c1, int *c2) {
  int     i, j, k;                    /* index counters */
  pRegion ptr;                        /* region in the mesh */
  COORD   origin;                     /* centroid coordinate information */
  pRegion *regions = NULL;            /* an array of regions */
  int     *npids = NULL;
  Region  *regions2 = NULL;           /* an array of regions */
  int     *npids2 = NULL;
  int     nreg;                       /* number of regions */
  COORD   min,                        /* minimum bounds of an octant */
          max;                        /* maximum bounds of an octant */
  COORD   cmin,                       /* minimum bounds of a child octant */
          cmax;                       /* maximum bounds of a child octant */
  COORD   rmin,                       /* minimum bounds of a remote octant */
          rmax;                       /* maximum bounds of a remote octant */
  int     new_num;
  int     n;
  int     dir = 0;
  pRList  RootList;              
  pOctant RootOct;
  OCT_Global_Info *OCT_info = (OCT_Global_Info *)(zz->LB.Data_Structure);
  char *yo = "Zoltan_Oct_migreg_migrate_orphans_static";
  int ierr = ZOLTAN_OK;
  ZOLTAN_ID_PTR gids2, lids2;
  int num_gid_entries = zz->Num_GID;
  int num_lid_entries = zz->Num_LID;

  if(nregions > 0) {
    /* create the array of messages to be sent to other processors */
    /* Array = (Message *) ZOLTAN_MALLOC(nregions * sizeof(Message)); */
    
    if((regions = (pRegion *) ZOLTAN_MALLOC(nregions * sizeof(pRegion))) == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      return ZOLTAN_MEMERR;
    }
    if((npids = (int *) ZOLTAN_MALLOC(nregions * sizeof(int))) == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      ZOLTAN_TRACE_EXIT(zz, yo);
      ZOLTAN_FREE(&regions);
      return ZOLTAN_MEMERR;
    }
  }
  ptr = RegionList;
  n = nreg = 0;
  while((ptr != NULL) && (nregions > 0)) {
    if(ptr->attached == 1) {
      /* if region already attached to an octant, then skip to next region */
      ptr = ptr->next;
      continue;
    }

    /* region not attached, have to find which processor to send to */
    j=0;
    dir = 0;
    vector_set(min, OCT_info->OCT_gmin);
    vector_set(max, OCT_info->OCT_gmax);
    /* 
     * for each level of refinement, find which child region belongs to.
     * translate which child to which entry in map array.
     */
    for(i=0; i<level; i++) {
      Zoltan_Oct_bounds_to_origin(min, max, origin);
      if(OCT_info->OCT_dimension == 2)
	j = j * 4;
      else
	j = j * 8;
      k = Zoltan_Oct_child_which(OCT_info,origin, ptr->Coord);
      new_num = Zoltan_Oct_convert_idx_from_map(OCT_info, dir, k);
      dir = Zoltan_Oct_get_child_dir(OCT_info, dir, new_num);
      j += new_num;
      Zoltan_Oct_child_bounds(min, max, origin, k, cmin, cmax);
      vector_set(min, cmin);
      vector_set(max, cmax);
    }
    /* inform message which processor to send to */
    npids[n] = array[j].npid;
    RootList = array[j].list;
    while((RootOct = RL_nextRootOctant(&RootList))) {
      Zoltan_Oct_bounds(RootOct,rmin,rmax);
      if (Zoltan_Oct_in_box_closure(OCT_info, ptr->Coord ,rmin, rmax)) {
	npids[n] = RootOct->npid;
	break;
      }
    }
    if((npids[n] != -1) && (npids[n] != zz->Proc)) {
      Zoltan_Oct_copy_info(zz, ptr, &(regions[n++]));
    }
    else {
      Zoltan_Oct_insert_orphan(zz, *ptr);
    }
    nreg++;                                      /* increment region counter */
    ptr = ptr->next;                                  /* look at next region */
  }

  /*
   * if regions looked at != number of regions in region list, 
   * then there is an error
   */
  if (nreg!=nregions) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "regions found != to expected number of regions");
    return ZOLTAN_FATAL;
  }

  regions2 = (Region *) ZOLTAN_MALLOC(n * sizeof(Region));
  gids2 = ZOLTAN_MALLOC_GID_ARRAY(zz, n);
  lids2 = ZOLTAN_MALLOC_LID_ARRAY(zz, n);
  npids2 = (int *) ZOLTAN_MALLOC(n * sizeof(int));
  
  for(i=0; i<n; i++) {
    npids2[i] = npids[i];
    vector_set(regions2[i].Coord, regions[i]->Coord);
    regions2[i].Weight = regions[i]->Weight;
    regions2[i].Global_ID = &(gids2[i*num_gid_entries]);
    regions2[i].Local_ID = (num_lid_entries 
                              ? &(lids2[i*num_lid_entries]) 
                              : NULL);
    ZOLTAN_SET_GID(zz, &(gids2[i*num_gid_entries]), regions[i]->Global_ID);
    ZOLTAN_SET_LID(zz, &(lids2[i*num_lid_entries]), regions[i]->Local_ID);
    regions2[i].Proc = regions[i]->Proc;
    regions2[i].attached = 0;
  }

  *c1 = n;
  /* migrate the orphan regions according to the message array */
  Zoltan_Oct_migreg_migrate_regions(zz, regions2, gids2, lids2, npids2, n, c2);
  
  for (i=0; i < n; i++) {
    ZOLTAN_FREE(&(regions[i]->Global_ID));
    ZOLTAN_FREE(&(regions[i]->Local_ID));
    ZOLTAN_FREE(&(regions[i]));
  }
  ZOLTAN_FREE(&regions);
  ZOLTAN_FREE(&npids);
  ZOLTAN_FREE(&regions2);
  ZOLTAN_FREE(&gids2);
  ZOLTAN_FREE(&lids2);
  ZOLTAN_FREE(&npids2);

  return ierr;
}
Ejemplo n.º 15
0
int Zoltan_RB_Create_Proc_List(
     ZZ       *zz,            /* Load-balancing structure. */
     int       set,           /* set that processor is in */
     int       dotnum,        /* number of dots that my processor has */
     int       outgoing,      /* number of dots that my processor is sending */
     int      *proclist,      /* processor list for my outgoing dots */
     MPI_Comm  comm,          /* communicator for partition */
     int       proclower,     /* smallest processor for Tflops_Special */
     int       numprocs       /* number of processors for Tflops_Special */
)
{
/* This routine calculates a communication pattern for the situation where
   there are two groups of processors and each processor has a number of
   items which need to be communicated to the other group.  This routine
   calculates a communication pattern which seeks to minimize the number
   of items that any one processor has after communication. */

     int  nprocs;             /* number of processors in partition */
     int  rank;               /* my processor number in partition */
     int  np_other = 0;       /* number of processors in other group */
     int  i, k;            /* loop indexes */
     int  err = ZOLTAN_OK;    /* error code */
     MPI_Datatype zoltan_gno_mpi_type;

     ZOLTAN_GNO_TYPE *send;         /* array of number of dots outgoing */
     ZOLTAN_GNO_TYPE *rem;          /* array of number of dots that remain */
     ZOLTAN_GNO_TYPE *sets;         /* set for each of the processors */
     ZOLTAN_GNO_TYPE  a;            /* number of dots that will be on each proc */
     ZOLTAN_GNO_TYPE  sum_send;     /* total number sent from my group */
     ZOLTAN_GNO_TYPE  sum_rem;      /* total number remaining in other group */
     ZOLTAN_GNO_TYPE  s, sp;        /* temporary sums */
     ZOLTAN_GNO_TYPE  num_to;       /* number of dots to send to a processor */
     ZOLTAN_GNO_TYPE *tmp_send;     /* Work vector */

     zoltan_gno_mpi_type = Zoltan_mpi_gno_type();


     /* allocate memory for arrays */
     MPI_Comm_rank(comm, &rank);
     if (zz->Tflops_Special) {
        nprocs = numprocs;
        rank -= proclower;
     }
     else
        MPI_Comm_size(comm, &nprocs);

     if ((send = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(3*nprocs*sizeof(ZOLTAN_GNO_TYPE))) == NULL)
        return ZOLTAN_MEMERR;
     if ((tmp_send = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(3*nprocs*sizeof(ZOLTAN_GNO_TYPE))) == NULL) {
        ZOLTAN_FREE(&send);
        return ZOLTAN_MEMERR;
     }
     rem = &send[nprocs];
     sets = &send[2*nprocs];

     /* gather number of outgoing and remaining dots across processors */

     if (zz->Tflops_Special) {
        for (i = 3*nprocs-1; i >= 0; i--)
           send[i] = 0;
        send[rank] = outgoing;
        send[rank+nprocs] = dotnum - outgoing;
        send[rank+2*nprocs] = set;
        Zoltan_RB_Gather(send, tmp_send, proclower, rank, nprocs, comm);
     }
     else {
        for (i = 3*nprocs-1; i >= 0; i--)
           tmp_send[i] = 0;
        tmp_send[rank] = outgoing;
        tmp_send[rank+nprocs] = dotnum - outgoing;
        tmp_send[rank+2*nprocs] = set;
        i = 3*nprocs;
        MPI_Allreduce(tmp_send, send, i, zoltan_gno_mpi_type, MPI_SUM, comm);
     }

     ZOLTAN_FREE(&tmp_send);

     /* Convert processor numbers to local (for this communicator) numbering
        and determine which subset of processors a processor is in. */

     /* to determine the number of dots (a) that will be on each of the
        processors in the other group, start with the average */
     sum_send = sum_rem = 0;
     for (i = 0; i < nprocs; i++)
        if (sets[i] == set)
           sum_send += send[i];
        else {
           sum_rem += rem[i];
           np_other++;
        }

     /* Modify the value of a, which is now the number of dots that a processor
        will have after communication if the number of dots that it keeps is
        not greater than a.  Set a so there is a non-negative number left. */
     if (sum_send) {
        if (np_other == 0){
          /* This should never happen. Prevent divide-by-zero to be safe. */
          err = ZOLTAN_FATAL;
          goto End;
        }
        a = (sum_send + sum_rem)/np_other;
        sp = -1;
        k = 0;
        while (!k) {
           s = 0;
           for (i = 0; i < nprocs; i++)
              if (sets[i] != set && rem[i] < a)
                 s += a - rem[i];
           if (s == sum_send)
              k = 1;
           else if (s < sum_send) {
              if (sp > sum_send)
                 k = 1;
              else
                 a++;
           }
           else {
              a--;
              if (sp < sum_send && sp > 0)
                 k = 1;
           }
           sp = s;
        }
     } else
        a = 0;

     /* Allocate who recieves how much and if necessary give out remainder.
        The variable send is now the number that will be received by each
        processor only in the other part */
     s = 0;
     for (i = 0; i < nprocs; i++)
        if (sets[i] != set && rem[i] < a)
           s += send[i] = a - rem[i];
     while (s < sum_send)
        for (i = 0; i < nprocs && s < sum_send; i++)
           if (sets[i] != set && (send[i] || !s)) {
              send[i]++;
              s++;
           }

     /* Create list of which processor to send dots to.  Only the other half
        of send has been overwritten above.  s is the number of dots that will
        be sent by processors in my part of the partition which are less then
        my processor.  sp is the sum of what processors in the other part are
        recieving.  The processor which causes sp > s is the first processor
        that we send dots to.  We continue to send dots to processors beyond
        that one until we run out of dots to send. */
     if (outgoing) {
        if (zz->Tflops_Special) a = (ZOLTAN_GNO_TYPE)outgoing;    /* keep outgoing around in a */
        s = sp = 0;
        for (i = 0; i < rank; i++)
           if (sets[i] == set)                   /* only overwrote other half */
              s += send[i];
        for (i = 0; sp <= s; i++)
           if (sets[i] != set)
              sp += send[i];
        i--;
        num_to = (outgoing < (sp - s)) ? outgoing : (sp - s);
        for (sp = 0; sp < num_to; sp++)
           proclist[sp] = i;
        outgoing -= num_to;
        while (outgoing > 0) {
           i++;
           while (sets[i] == set)
              i++;
           num_to = (outgoing < send[i]) ? outgoing : send[i];
           outgoing -= num_to;
           for (s = 0; s < num_to; s++, sp++)
              proclist[sp] = i;
        }
        if (zz->Tflops_Special)
           for (s = 0; s < a; s++)
              proclist[s] += proclower;
     }

End:
     /* free memory and return */
     ZOLTAN_FREE(&send);


     return err;
}
Ejemplo n.º 16
0
int Zoltan_ParMetis(
  ZZ *zz,               /* Zoltan structure */
  float *part_sizes,    /* Input:  Array of size zz->Num_Global_Parts
                           containing the percentage of work to be
                           assigned to each partition.               */
  int *num_imp,         /* number of objects to be imported */
  ZOLTAN_ID_PTR *imp_gids,  /* global ids of objects to be imported */
  ZOLTAN_ID_PTR *imp_lids,  /* local  ids of objects to be imported */
  int **imp_procs,      /* list of processors to import from */
  int **imp_to_part,    /* list of partitions to which imported objects are
                           assigned.  */
  int *num_exp,         /* number of objects to be exported */
  ZOLTAN_ID_PTR *exp_gids,  /* global ids of objects to be exported */
  ZOLTAN_ID_PTR *exp_lids,  /* local  ids of objects to be exported */
  int **exp_procs,      /* list of processors to export to */
  int **exp_to_part     /* list of partitions to which exported objects are
                           assigned. */
)
{
  char *yo = "Zoltan_ParMetis";
  int ierr;
  ZOLTAN_Third_Graph gr;
  ZOLTAN_Third_Geom  *geo = NULL;
  ZOLTAN_Third_Vsize vsp;
  ZOLTAN_Third_Part  prt;
  ZOLTAN_Output_Part part;

  ZOLTAN_ID_PTR global_ids = NULL;
  ZOLTAN_ID_PTR local_ids = NULL;

  int use_timers = 0;
  int timer_p = -1;
  int get_times = 0;
  double times[5];

  double pmv3_itr = 0.0;
  realtype itr = 0.0;
  indextype options[MAX_OPTIONS];
  char alg[MAX_PARAM_STRING_LEN+1];

#ifdef ZOLTAN_PARMETIS
  MPI_Comm comm = zz->Communicator;/* don't risk letting external packages */
                                   /* change our zz struct.                  */
#endif

  indextype i;
  realtype *imb_tols;
  indextype ncon;
  indextype edgecut;
  indextype wgtflag;
  indextype numflag = 0;
  indextype num_part = zz->LB.Num_Global_Parts; /* passed to ParMETIS. */

  ZOLTAN_TRACE_ENTER(zz, yo);

  Zoltan_Third_Init(&gr, &prt, &vsp, &part,
                    imp_gids, imp_lids, imp_procs, imp_to_part,
                    exp_gids, exp_lids, exp_procs, exp_to_part);

  if (sizeof(realtype) != sizeof(float)) {
    int tmp = zz->LB.Num_Global_Parts * MAX(zz->Obj_Weight_Dim, 1);
    realtype sum = 0.;

    prt.input_part_sizes = (realtype *)
                   ZOLTAN_MALLOC(tmp * sizeof(realtype));
    for (i = 0; i < tmp; i++) {
      prt.input_part_sizes[i] = (realtype) part_sizes[i];
      sum += prt.input_part_sizes[i];
    }
    if (sum != (realtype) 1.0) {
      /* rescale part sizes in case of roundoff in conversion; 
       * ParMETIS requires sum of part sizes to be 1.0
       */
      for (i = 0; i < tmp; i++) {
        prt.input_part_sizes[i] /= sum;
      }
    }
    prt.part_sizes = prt.input_part_sizes;
  }
  else
    prt.input_part_sizes = prt.part_sizes = (realtype *) part_sizes;


  ierr = Zoltan_Parmetis_Parse(zz, options, alg, &itr, &pmv3_itr, NULL);
  if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)) {
    Zoltan_Third_Exit(&gr, geo, &prt, &vsp, &part, NULL);
    return (ierr);
  }

  gr.graph_type = 0;

#ifdef ZOLTAN_PARMETIS
  SET_GLOBAL_GRAPH(&gr.graph_type);
  /* Select type of graph, negative because we impose them */
  /* TODO: add a parameter to select the type, shared with Scotch */
/*   if (strcmp (graph_type, "GLOBAL") != 0) { */
/*     gr.graph_type = - LOCAL_GRAPH; */
/*     if (zz->Num_Proc > 1) { */
/*       ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Distributed graph: cannot call METIS, switching to ParMetis"); */
/*       gr.graph_type = - GLOBAL_GRAPH; */
/*       retval = ZOLTAN_WARN; */
/*     } */
/*   } */
#else /* graph is local */
  SET_LOCAL_GRAPH(&gr.graph_type);
#endif /* ZOLTAN_PARMETIS */


  /* Some algorithms use geometry data */
  if (strncmp(alg, "PARTGEOM", 8) == 0){          /* PARTGEOM & PARTGEOMKWAY */
    geo = (ZOLTAN_Third_Geom*) ZOLTAN_MALLOC(sizeof(ZOLTAN_Third_Geom));
    memset (geo, 0, sizeof(ZOLTAN_Third_Geom));
    /* ParMETIS will crash if geometric method and some procs have no nodes. */
    /* Avoid fatal crash by setting scatter to level 2 or higher. */
    gr.scatter_min = 2;
    if (geo == NULL) {
      ZOLTAN_PRINT_ERROR (zz->Proc, yo, "Out of memory.");
      return (ZOLTAN_MEMERR);
    }
    if (strcmp(alg, "PARTGEOM") == 0) {
      gr.get_data = 0;
    }
  }

  timer_p = Zoltan_Preprocess_Timer(zz, &use_timers);

  /* Start timer */
  get_times = (zz->Debug_Level >= ZOLTAN_DEBUG_ATIME);
  if (get_times){
    MPI_Barrier(zz->Communicator);
    times[0] = Zoltan_Time(zz->Timer);
  }

  vsp.vsize_malloc = 0;
#ifdef PARMETIS31_ALWAYS_FREES_VSIZE
  if (!strcmp(alg, "ADAPTIVEREPART") && (zz->Num_Proc > 1)) {
    /* ParMETIS will free this memory; use malloc to allocate so
       ZOLTAN_MALLOC counters don't show an error. */
    vsp.vsize_malloc = 1 ;
  }
#endif /* PARMETIS31_ALWAYS_FREES_VSIZE */


  ierr = Zoltan_Preprocess_Graph(zz, &global_ids, &local_ids,  &gr, 
                                 geo, &prt, &vsp);
  if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)) {
    Zoltan_Third_Exit(&gr, geo, &prt, &vsp, &part, NULL);
    return (ierr);
  }

  /* Get object sizes if requested */
  if (options[PMV3_OPT_USE_OBJ_SIZE] &&
      (zz->Get_Obj_Size || zz->Get_Obj_Size_Multi) &&
      (!strcmp(alg, "ADAPTIVEREPART") || gr.final_output))
    gr.showMoveVol = 1;


  /* Get a time here */
  if (get_times) times[1] = Zoltan_Time(zz->Timer);

  /* Get ready to call ParMETIS */
  edgecut = -1;
  wgtflag = 2*(gr.obj_wgt_dim>0) + (gr.edge_wgt_dim>0);
  numflag = 0;
  ncon = (gr.obj_wgt_dim > 0 ? gr.obj_wgt_dim : 1);

  if (!prt.part_sizes){
    ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL,"Input parameter part_sizes is NULL.");
  }
  if ((zz->Proc == 0) && (zz->Debug_Level >= ZOLTAN_DEBUG_ALL)) {
    for (i=0; i<num_part; i++){
      indextype j;

      printf("Debug: Size(s) for part " TPL_IDX_SPEC " = ", i);
      for (j=0; j<ncon; j++)
        printf("%f ", prt.part_sizes[i*ncon+j]);
      printf("\n");
    }
  }

  /* if (strcmp(alg, "ADAPTIVEREPART") == 0) */
  for (i = 0; i < num_part*ncon; i++)
    if (prt.part_sizes[i] == 0) 
      ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL, "Zero-sized part(s) requested! "
                            "ParMETIS 3.x will likely fail. Please use a "
                            "different method, or remove the zero-sized "
                            "parts from the problem.");


  /* Set Imbalance Tolerance for each weight component. */
  imb_tols = (realtype *) ZOLTAN_MALLOC(ncon * sizeof(realtype));
  if (!imb_tols){
    /* Not enough memory */
    ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory.");
  }
  for (i=0; i<ncon; i++)
    imb_tols[i] = (realtype) (zz->LB.Imbalance_Tol[i]);

  /* Now we can call ParMetis */

  /* Zoltan_Third_Graph_Print(zz, &gr, "Before calling parmetis"); */


#ifdef ZOLTAN_PARMETIS
  if (!IS_LOCAL_GRAPH(gr.graph_type)) { /* May be GLOBAL or NO GRAPH */

    /* First check for ParMetis 3 routines */
    if (strcmp(alg, "PARTKWAY") == 0){
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library "
                                  "ParMETIS_V3_PartKway");
      ParMETIS_V3_PartKway(gr.vtxdist, gr.xadj, gr.adjncy, gr.vwgt, gr.ewgts,
                           &wgtflag, &numflag, &ncon, &num_part, prt.part_sizes,
                           imb_tols, options, &edgecut, prt.part, &comm);
      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");
    }
    else if (strcmp(alg, "PARTGEOMKWAY") == 0){
      indextype ndims = geo->ndims;
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library "
                                  "ParMETIS_V3_PartGeomKway");
      ParMETIS_V3_PartGeomKway(gr.vtxdist, gr.xadj, gr.adjncy, gr.vwgt,gr.ewgts,
                               &wgtflag, &numflag, &ndims, geo->xyz, &ncon,
                               &num_part, prt.part_sizes,
                               imb_tols, options, &edgecut, prt.part, &comm);
      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");
    }
    else if (strcmp(alg, "PARTGEOM") == 0){
      indextype ndims = geo->ndims;
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library "
                                  "ParMETIS_V3_PartGeom");
      ParMETIS_V3_PartGeom(gr.vtxdist, &ndims, geo->xyz, prt.part, &comm);
      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");
    }
    else if (strcmp(alg, "ADAPTIVEREPART") == 0){
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library "
                                  "ParMETIS_V3_AdaptiveRepart");
      ParMETIS_V3_AdaptiveRepart(gr.vtxdist, gr.xadj, gr.adjncy, gr.vwgt,
                                 vsp.vsize, gr.ewgts, &wgtflag, &numflag, &ncon,
                                 &num_part, prt.part_sizes, imb_tols,
                                 &itr, options, &edgecut, prt.part, &comm);
      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");
    }
    else if (strcmp(alg, "REFINEKWAY") == 0){
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the ParMETIS library "
                                  "ParMETIS_V3_RefineKway");
      ParMETIS_V3_RefineKway(gr.vtxdist, gr.xadj, gr.adjncy, gr.vwgt, gr.ewgts,
                             &wgtflag, &numflag, &ncon, &num_part,
                             prt.part_sizes, imb_tols,
                             options, &edgecut, prt.part, &comm);
      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the ParMETIS library");
    }
    else {
      /* Sanity check: This should never happen! */
      char msg[256];
      sprintf(msg, "Unknown ParMetis algorithm %s.", alg);
      ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL, msg);
    }
  }
#endif /* ZOLTAN_PARMETIS */
#ifdef ZOLTAN_METIS
  /* TODO: I don't know how to set balance ! */
  if (IS_LOCAL_GRAPH(gr.graph_type)) {
    /* Check for Metis routines */
    if (strcmp(alg, "PARTKWAY") == 0){
      ZOLTAN_TRACE_DETAIL(zz, yo, "Calling the METIS library ");
      /* Use default options for METIS */
      options[0] = 0;

#if !defined(METIS_VER_MAJOR) || METIS_VER_MAJOR < 5
      METIS_WPartGraphKway (gr.vtxdist+1, gr.xadj, gr.adjncy, 
                            gr.vwgt, gr.ewgts, &wgtflag,
                            &numflag, &num_part, prt.part_sizes, 
                            options, &edgecut, prt.part);
#else
      METIS_PartGraphKway (gr.vtxdist+1, &ncon, gr.xadj, gr.adjncy,
                           gr.vwgt, vsp.vsize, gr.ewgts, &num_part,
                           prt.part_sizes, imb_tols, options,
                           &edgecut, prt.part);
#endif

      ZOLTAN_TRACE_DETAIL(zz, yo, "Returned from the METIS library");
    }
    else {
      /* Sanity check: This should never happen! */
      char msg[256];
      sprintf(msg, "Unknown Metis algorithm %s.", alg);
      ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL, msg);
    }
  }
#endif /* ZOLTAN_METIS */


  /* Get a time here */
  if (get_times) times[2] = Zoltan_Time(zz->Timer);


  if (gr.final_output) { 
    /* Do final output now because after the data will not be coherent:
       unscatter only unscatter part data, not graph */
    ierr = Zoltan_Postprocess_FinalOutput (zz, &gr, &prt, &vsp, use_timers, itr);
  }
  /* Ignore the timings of Final Ouput */
  if (get_times) times[3] = Zoltan_Time(zz->Timer);

  ierr = Zoltan_Postprocess_Graph(zz, global_ids, local_ids, &gr, 
                                  geo, &prt, &vsp, NULL, &part);

  Zoltan_Third_Export_User(&part, 
                           num_imp, imp_gids, imp_lids, imp_procs, imp_to_part,
                           num_exp, exp_gids, exp_lids, exp_procs, exp_to_part);

  /* Get a time here */
  if (get_times) times[4] = Zoltan_Time(zz->Timer);

  if (get_times) Zoltan_Third_DisplayTime(zz, times);

  if (use_timers && timer_p >= 0)
    ZOLTAN_TIMER_STOP(zz->ZTime, timer_p, zz->Communicator);

  Zoltan_Third_Exit(&gr, geo, &prt, &vsp, NULL, NULL);
  if (imb_tols != NULL) ZOLTAN_FREE(&imb_tols);
  if (geo != NULL) ZOLTAN_FREE(&geo);
  ZOLTAN_FREE(&global_ids);
  ZOLTAN_FREE(&local_ids);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
Ejemplo n.º 17
0
/*
 * void Zoltan_Oct_gen_tree_from_input_data()
 *
 * This function will create a root node of on each processor which will
 * then be used to create an octree with regions associated with it. The
 * tree will then be balanced and the output used to balance "mesh regions"
 * on several processors.
 */
static void Zoltan_Oct_gen_tree_from_input_data(ZZ *zz, int oct_wgtflag,
						int *c1, int *c2, int *c3, 
						float *c0, int createpartree) 
{
  char *yo = "Zoltan_Oct_gen_tree_from_input_data";
  pRList  RootList;       /* list of all local roots */
  pOctant RootOct;        /* root octree octant */
  COORD min,              /* min coord bounds of objects */
        max;              /* max coord bounds of objects */
  int num_extra;          /* number of orphaned objects */
  int num_objs;           /* total number of local objects */
  pRegion ptr,            /* pointer to iterate trough region list */
          ptr1;           /* pointer to iterate trough region list */
  pOctant root;           /* root of the partition tree */
  int     i;              /* index counter */
  int     count,          /* count for leaf nodes in partition tree */
          proc,           /* proc leaf node of parition tree belongs to */
          extra,          /* extra leaf node flag, if not evenly divisible */
          remainder;      /* remainder of node, or processors to fill */
  pOctant cursor,         /* cursor to iterate through octant list */
          cursor2,        /* another cursor to iterate through octant list */
          parent;         /* parent of an octant */
  int level,              /* number of levels of refinement */
      n,                  /* index counter */
      part;               /* partition counter */
  Map *array;             /* map of which processors own which octants */
  int hold;               /* used for calculating partition divisions */
  int ierr = 0;

#ifdef KDDKDD_NEW_BOUNDS_GEOM_QUERY_FN
  double bounds[6] = {DBL_MAX,DBL_MAX,DBL_MAX,-DBL_MAX,-DBL_MAX,-DBL_MAX};
  COORD global_min, global_max;
#endif /* KDDKDD_NEW_BOUNDS_GEOM_QUERY_FN */
  int nroots = 0;
  /*test*/
  /* COORD gmin,gmax; */

  OCT_Global_Info *OCT_info = (OCT_Global_Info *) (zz->LB.Data_Structure);

  ZOLTAN_TRACE_ENTER(zz, yo);
  /*
   * If there are no objects on this processor, do not create a root octant.
   * The partitioner will probably assign objects to this processor
   */
  if(zz->Get_Num_Obj == NULL) {
    fprintf(stderr, "OCT %s\n\t%s\n", "Error in octree load balance:",
	    "Must register ZOLTAN_NUM_OBJ_FN function");
    abort();
  }
  *c3 = num_objs = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr);
  if (ierr) {
    fprintf(stderr, "OCT [%d] %s: Error returned from user defined "
                    "Get_Num_Obj function.\n", zz->Proc, yo);
    exit (-1);
  }
  ptr1 = NULL;

  ZOLTAN_TRACE_DETAIL(zz, yo, "Calling Zoltan_Oct_get_bounds");
  /* Need A Function To Get The Bounds Of The Local Objects */
  Zoltan_Oct_get_bounds(zz, &ptr1, &num_objs, min, max, oct_wgtflag, c0);
  
#ifndef KDDKDD_NEW_BOUNDS_GEOM_QUERY_FN
  /* For now, don't want to add the new query function to Zoltan. */
  /* Zoltan_Oct_get_bounds appears to compute the global min and max from */
  /* the object input. */
  vector_set(OCT_info->OCT_gmin, min);
  vector_set(OCT_info->OCT_gmax, max);
#else
  /*test*/
  /*getMaxBounds(&gmin, &gmax);*/
  if(zz->Get_Bounds_Geom == NULL) {
    fprintf(stderr, "OCT %s\n\t%s\n", "Error in octree load balance:",
	    "Must register Get_Bounds_Geom function");
    abort();
  }
  zz->Get_Bounds_Geom(zz->Get_Bounds_Geom_Data, bounds, &ierr); 
  
  MPI_Allreduce(&(bounds[0]), &(global_min[0]), 3, 
		MPI_DOUBLE, MPI_MIN, zz->Communicator);
  MPI_Allreduce(&(bounds[3]), &(global_max[0]), 3,
		MPI_DOUBLE, MPI_MAX, zz->Communicator);
  vector_set(OCT_info->OCT_gmin, global_min);
  vector_set(OCT_info->OCT_gmax, global_max);
#endif
  /* 
   * the following code segment was added to create a pseudo global octree
   * needed for the partitioner. The basic idea is to regroup all the
   * regions into something close to an octree partitioning and build the
   * tree from that.
   * NOTE: This way of doing things is very costly, especially when calling
   * this for the first time on a mesh not partitioned in an octree style
   * partitioning.
   */

    level = 0;                                    /* initialize level count */

  /* 
   * if more than 1 processor, need to find what level of refinement needed
   * to initially partition bounding box among the processors 
   */

  
    if(zz->Num_Proc > 1) {
      n = zz->Num_Proc;
      if(OCT_info->OCT_dimension == 2)
	hold = 4;
      else
	hold = 8;
      remainder = hold;
      for(; remainder > 0; level++) {
	int pr = (int)POW(hold, level);
	remainder = n - pr;
      }
      level--;
    }
  ZOLTAN_TRACE_DETAIL(zz, yo, "Before createpartree");

  if(createpartree) {
    /* create the global root octant */
    root = Zoltan_Oct_POct_new(OCT_info);
    Zoltan_Oct_setbounds(root, OCT_info->OCT_gmin, OCT_info->OCT_gmax);
    /* Zoltan_Oct_setOrientation(root, 0); */
  
    /* subdivide to as many levels as calculated */
    for(i=0; i<level; i++) {
      cursor = root;
      while(cursor != NULL) {
	if(Zoltan_Oct_isTerminal(cursor)) {
	  cursor2 = Zoltan_Oct_POct_nextDfs(OCT_info, cursor);
	  Zoltan_Oct_terminal_refine(zz, cursor, 0);
	  cursor = cursor2;
	}
	else 
	  cursor = Zoltan_Oct_POct_nextDfs(OCT_info, cursor);
      }
    }
    
#if 0
    if(zz->Proc == 0)
      for(i=0; i<8; i++)
	if(Zoltan_Oct_child(root, i) == NULL)
	  fprintf(stderr,"NULL child pointer\n");
	else
	  fprintf(stderr, "child %d exists\n", i);
#endif

  ZOLTAN_TRACE_DETAIL(zz, yo, "Before create map array");
    /* this part creates the map array */
    if(OCT_info->OCT_dimension == 2) {
      hold = (int)POW(4, level);                 /* ignoring the z+ octants */
      if(hold == 0)
	hold = 1;
    }
    else
      hold = (int)POW(8, level);

    part = hold / zz->Num_Proc;          /* how many octants per partition */
    remainder = hold % zz->Num_Proc; /* extra octants, not evenly divisible */
    extra = zz->Num_Proc - remainder;/* where to start adding extra octants */
    array = (Map *) ZOLTAN_MALLOC(hold * sizeof(Map));   /* alloc map array */
    if(array == NULL) {
      fprintf(stderr, "OCT ERROR on proc %d, could not allocate array map\n",
	      zz->Proc);
      abort();
    }
    /* initialize variables */
    proc = 0;
    count = 0;
    i = 0;
    cursor = root; 
    while(cursor != NULL) {
      cursor2 = Zoltan_Oct_POct_nextDfs(OCT_info, cursor);
      if((Zoltan_Oct_isTerminal(cursor)) && (i < hold)) {
	if(proc == extra) {
	  part++;
	  extra = -1;
	}
	if(count != part) {
	  array[i].npid = proc;
	  array[i].list = RL_initRootList();
	  Zoltan_Oct_bounds(cursor, min, max);
	  vector_set(array[i].min, min);
	  vector_set(array[i].max, max);
	  count++;
	}
	else {
	  count = 1;
	  proc++;
	  array[i].npid = proc;
	  array[i].list = RL_initRootList();
	  Zoltan_Oct_bounds(cursor, min, max);
	  vector_set(array[i].min, min);
	  vector_set(array[i].max, max);
	}
	if(proc == zz->Proc) {
	  array[i].npid = -1;
          /* KDDKDD Added RL_freeList below.  The 
           * KDDKDD implementation from RPI leaked memory because the 
           * KDDKDD test cases for setting array[i].list were not mutually 
           * KDDKDD exclusive.  Freeing the list produces the result we got
           * KDDKDD before, without the memory leak.
           */
	  /* LGG --  it seems to me that this array[i].list assignment is
	   * not really necessary. It looks as though it has already been
	   * assigned with the same information from the prev if-else
	   * commented out RL_freeList(), and RL_initRootList()
	   */
          /*RL_freeList(&(array[i].list));*/
          /* KDDKDD End addition */
	  /*array[i].list = RL_initRootList();*/
	  parent = Zoltan_Oct_parent(cursor);
	  if(parent != NULL)
	    Zoltan_Oct_setchild(parent, cursor->which, NULL);
	  /* octant into local root list */
 	  Zoltan_Oct_POct_setparent(OCT_info, cursor, NULL, -1);
	  Zoltan_Oct_setMapIdx(cursor, i);
	  nroots++;
	  /* Zoltan_Oct_POct_setparent(OCT_info, cursor, NULL, zz->Proc);     
             octant into local root list */
	}
	i++;
      }
      cursor = cursor2;
    } 
    RootList = Zoltan_Oct_POct_localroots(OCT_info); 
    RootOct = RL_nextRootOctant(&RootList);
    if(RootOct != root) {
      /* KDDKDDFREE changed root to &root to allow root to be reset to NULL */
      Zoltan_Oct_POct_delTree(OCT_info,&root);
    }
    
    OCT_info->map = array;
    OCT_info->mapsize = hold;
  }

  /* 
   * attach the regions to the root... Zoltan_Oct_fix will create the octree
   * starting with the root and subdividing as needed 
   */    
  num_extra = Zoltan_Oct_fix(zz, ptr1, num_objs);
 
  ZOLTAN_TRACE_DETAIL(zz, yo, "Calling Zoltan_Oct_migreg_migrate_orphans");
  Zoltan_Oct_migreg_migrate_orphans(zz, ptr1, num_extra, level, OCT_info->map,
				    c1, c2);

  /* ZOLTAN_FREE(&array); */
  while(ptr1 != NULL) {
    ptr = ptr1->next;
    ZOLTAN_FREE(&(ptr1->Global_ID));
    ZOLTAN_FREE(&(ptr1->Local_ID));
    ZOLTAN_FREE(&ptr1);
    ptr1 = ptr;
  }
  ZOLTAN_TRACE_EXIT(zz, yo);
}
Ejemplo n.º 18
0
int Zoltan_Block(
  ZZ *zz,                       /* The Zoltan structure.                     */
  float *part_sizes,            /* Input:  Array of size zz->LB.Num_Global_Parts
                                   containing the percentage of work to be
                                   assigned to each partition.               */
  int *num_import,              /* Return -1. We use only export lists. */
  ZOLTAN_ID_PTR *import_global_ids, /* Not used. */
  ZOLTAN_ID_PTR *import_local_ids,  /* Not used. */
  int **import_procs,           /* Not used. */
  int **import_to_part,         /* Not used. */
  int *num_export,              /* Output: Number of objects to export. */
  ZOLTAN_ID_PTR *export_global_ids, /* Output: GIDs to export. */
  ZOLTAN_ID_PTR *export_local_ids,  /* Output: LIDs to export. */
  int **export_procs,           /* Output: Processsors to export to. */
  int **export_to_part          /* Output: Partitions to export to. */
)
{
  int ierr = ZOLTAN_OK;
  int i, count, num_obj;
  int wtflag;
  ZOLTAN_ID_PTR global_ids = NULL;
  ZOLTAN_ID_PTR local_ids = NULL; 
  int *parts = NULL;
  int *newparts = NULL;
  float *wgts = NULL;
  static char *yo = "Zoltan_Block";

  ZOLTAN_TRACE_ENTER(zz, yo);

  /* No import lists computed. */
  *num_import = -1;
  *export_global_ids = *export_local_ids = NULL;
  *export_procs = *export_to_part = NULL;

  /* Get list of local objects. */
  if (zz->Obj_Weight_Dim > 1) {
    ierr = ZOLTAN_FATAL;
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                      "OBJ_WEIGHT_DIM > 1 not supported by LB_METHOD BLOCK.");
    goto End;
  }
  wtflag = (zz->Obj_Weight_Dim>0 ? 1 : 0);
  ierr = Zoltan_Get_Obj_List(zz, &num_obj, &global_ids, &local_ids, wtflag,
                             &wgts, &parts);

  /* Compute the new partition numbers. */
  newparts = (int *) ZOLTAN_MALLOC(num_obj * sizeof(int));
  if (num_obj && (!newparts)){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }
  block_part(zz, num_obj, wtflag, wgts, part_sizes, newparts);

  /* Check how many partition numbers changed. */
  count=0;
  for (i=0; i<num_obj; i++){
    if (newparts[i] != parts[i])
      ++count;
  }
  (*num_export) = count;

  /* Allocate export lists. */
  if ((*num_export) > 0) {
    if (!Zoltan_Special_Malloc(zz, (void **)export_global_ids, (*num_export),
                               ZOLTAN_SPECIAL_MALLOC_GID)
     || !Zoltan_Special_Malloc(zz, (void **)export_local_ids, (*num_export),
                               ZOLTAN_SPECIAL_MALLOC_LID)
     || !Zoltan_Special_Malloc(zz, (void **)export_procs, (*num_export),
                               ZOLTAN_SPECIAL_MALLOC_INT)
     || !Zoltan_Special_Malloc(zz, (void **)export_to_part, (*num_export),
                               ZOLTAN_SPECIAL_MALLOC_INT)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }
  }

  /* Loop over objects and fill export lists. */
  count=0;
  for (i=0; i<num_obj; i++){
    if (newparts[i] != parts[i]){
      /* export_global_ids[count] = global_ids[i]; */
      ZOLTAN_SET_GID(zz, &((*export_global_ids)[count*zz->Num_GID]),
                     &global_ids[i*zz->Num_GID]);
      if (local_ids)
        /* export_local_ids[count] = local_ids[i]; */
        ZOLTAN_SET_LID(zz, &((*export_local_ids)[count*zz->Num_LID]),
                       &local_ids[i*zz->Num_LID]);
      /* Set new partition number. */
      (*export_to_part)[count] = newparts[i];
      /* Processor is derived from partition number. */
      (*export_procs)[count] = Zoltan_LB_Part_To_Proc(zz, 
                     (*export_to_part)[count], &global_ids[i*zz->Num_GID]);

      ++count;
    }
  }

End:
  /* Free local memory, but not export lists. */
  ZOLTAN_FREE(&global_ids);
  ZOLTAN_FREE(&local_ids);
  ZOLTAN_FREE(&parts);
  ZOLTAN_FREE(&newparts);
  if (wtflag) ZOLTAN_FREE(&wgts);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ierr;
}
Ejemplo n.º 19
0
static int local_HEs_from_export_lists(
  ZZ *zz,
  int remap_type,      /* type of remapping to do:  parts, procs, or none. */
  int nobj,            /* # objs the processor knows about (keep + exports) */
  int *new_proc,       /* On input, new processor assignment for each obj; 
                          Upon return, remapped new proc assignment for
                          each obj. */
  int *old_part,       /* old partition assignments for each objs */
  int *new_part,       /* On input, new partition assignments for each objs.
                          Upon return, remapped new partition assignments */
  int *HEcnt,          /* # of HEs allocated. */
  int **HEinfo         /* Array of HE info; for each HE, two pins and 
                          one edge weight. Stored as a single vector
                          to minimize communication calls.  */
) 
{
/*  Routine to remap partitions (to new processors or new partition numbers)
 *  to reduce data movement.
 *  This routine assumes the load-balancing algorithm built export lists.
 *  Objects described are those that STARTED on zz->Proc due to load balancing.
 *  For all these objects, old_proc == zz->Proc.
 */
char *yo = "local_HEs_from_export_lists";
int ierr = ZOLTAN_OK;
int i, cnt, tmp;
int *tmp_HEinfo;
int my_proc = zz->Proc;       /* This processor's rank. */

int nimp = 0;
int *imp_proc = NULL,         /* Temporary arrays if inversion of export to */
    *imp_old_part = NULL,     /* import lists is needed. */
    *imp_new_part = NULL;

int HEwgt_size;               /* # of HE weights allocated. */
int *HEwgt = NULL;            /* Array of HE weights.  Initially includes
                                 zero weights; later zero-weights are removed.*/

  if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) {
    /* Build HEs based on processor assignment.
     * We know the old processor for all objects we are keeping and all
     * export objects -- it is my_proc!
     * We also know the new processor number for all objects initially on
     * my_proc (since we built export lists.)
     * This case is a special case of partition remapping; it is easy to 
     * build the hyperedges in this special case.
     */

    HEwgt_size = zz->Num_Proc;
    HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
    if (!HEwgt) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < nobj; i++) 
      HEwgt[new_proc[i]]++;

    *HEcnt = 0;
    for (i = 0; i < HEwgt_size; i++)
      if (HEwgt[i] != 0) (*HEcnt)++;
   
    ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
    if (ierr < 0) 
      goto End;
    tmp_HEinfo = *HEinfo;

    cnt = 0;
    for (i = 0; i < HEwgt_size; i++) {
      if (HEwgt[i] != 0) {
        tmp = cnt * HEINFO_ENTRIES;
        tmp_HEinfo[tmp] = my_proc;    /* Old processor number */
        tmp_HEinfo[tmp+1] = i;        /* New processor number */
        tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */
        cnt++;
      }
    }
  }

  else {  /* ZOLTAN_LB_REMAP_PARTS */
    /* Cannot renumber partitions given export lists without summing HE weights
     * across processors.  This summation is not straightforward.  Also, a 
     * potentially large number of HEs may exist 
     * (max_old_partition_number * zz->Num_Global_Parts).  Rather than build
     * this large matrix, just compute import lists from the export lists
     * and run the import-list algorithm.
     */
    ZOLTAN_COMM_OBJ *plan;
    int msg_tag = 22345;

    ierr = Zoltan_Comm_Create(&plan, nobj, new_proc, zz->Communicator,
                              msg_tag, &nimp);

    if (nimp > 0) {
      imp_proc = (int *) ZOLTAN_MALLOC(3 * nimp * sizeof(int));
      imp_old_part = imp_proc + nimp;
      imp_new_part = imp_old_part + nimp;
      if (!imp_proc) {
        ierr = ZOLTAN_MEMERR;
        ZOLTAN_PRINT_ERROR(my_proc, yo, "Memory error.");
        goto End;
      }
    }

    ierr = Zoltan_Comm_Info(plan, NULL, NULL, NULL, NULL, NULL, NULL, 
                            NULL, NULL, NULL, NULL, NULL, imp_proc, NULL);

    msg_tag++;
    ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) old_part, sizeof(int),
                          (char *) imp_old_part);

    msg_tag++;
    ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) new_part, sizeof(int),
                          (char *) imp_new_part);

    Zoltan_Comm_Destroy(&plan);

    ierr = local_HEs_from_import_lists(zz, remap_type, nimp, imp_proc,
                                       imp_old_part, imp_new_part,
                                       HEcnt, HEinfo);
  }

End:

  if (HEwgt) ZOLTAN_FREE(&HEwgt);
  if (imp_proc) ZOLTAN_FREE(&imp_proc);

  return ierr;
}
Ejemplo n.º 20
0
int Zoltan_Order (
    struct Zoltan_Struct *zz,
    int num_gid_entries,
    int num_obj,
    ZOLTAN_ID_PTR gids,
    ZOLTAN_ID_PTR permuted_global_ids
)
{
    /*
     * Main user-call for ordering.
     * Input:
     *   zz, a Zoltan structure with appropriate function pointers set.
     *   gids, a list of global ids.
     *   num_gid_entries
     * Output:
     *   permuted_global_ids
     * Return values:
     *   Zoltan error code.
     */

    char *yo = "Zoltan_Order";
    int ierr;
    double start_time, end_time;
    double order_time[2] = {0.0,0.0};
    char msg[256];
    int comm[2],gcomm[2];
    ZOLTAN_ORDER_FN *Order_fn;
    struct Zoltan_Order_Options opt;
    ZOLTAN_ID_PTR local_gids=NULL, lids=NULL;
    int local_num_obj;
    int *local_rank = NULL;
    struct Zoltan_DD_Struct *dd = NULL;


    ZOLTAN_TRACE_ENTER(zz, yo);

    if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
        Zoltan_Print_Key_Params(zz);

    start_time = Zoltan_Time(zz->Timer);

    /*
     * Compute Max number of array entries per ID over all processors.
     * This is a sanity-maintaining step; we don't want different
     * processors to have different values for these numbers.
     */
    comm[0] = zz->Num_GID;
    comm[1] = zz->Num_LID;
    MPI_Allreduce(comm, gcomm, 2, MPI_INT, MPI_MAX, zz->Communicator);
    zz->Num_GID = gcomm[0];

    if (num_gid_entries != zz->Num_GID) {
        char msg[253];
        sprintf(msg, "num_gid_entries=%d is not equal to parameter setting "
                "NUM_GID_ENTRIES=%d\n", num_gid_entries, zz->Num_GID);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
        return (ZOLTAN_FATAL);
    }


    zz->Order.nbr_objects = num_obj;
    zz->Order.start = NULL;
    zz->Order.ancestor = NULL;
    zz->Order.leaves = NULL;
    zz->Order.nbr_leaves = 0;
    zz->Order.nbr_blocks = 0;

    /*
     *  Return if this processor is not in the Zoltan structure's
     *  communicator.
     */

    if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_OK);
    }

    /*
     *  Get ordering options from parameter list.
     */

    /* Set default parameter values */
#ifdef HAVE_MPI
    strncpy(opt.method, "PARMETIS", MAX_PARAM_STRING_LEN);
    strcpy(zz->Order.order_type, "GLOBAL");
#else
    strncpy(opt.method, "METIS", MAX_PARAM_STRING_LEN);
    strcpy(zz->Order.order_type, "LOCAL");
#endif /* HAVE_MPI */

    opt.use_order_info = 0;
    opt.start_index = 0;

    Zoltan_Bind_Param(Order_params, "ORDER_METHOD", (void *) opt.method);
    Zoltan_Bind_Param(Order_params, "USE_ORDER_INFO", (void *) &opt.use_order_info);

    Zoltan_Assign_Param_Vals(zz->Params, Order_params, zz->Debug_Level,
                             zz->Proc, zz->Debug_Proc);

    /*
     *  Check that the user has allocated space for the return args.
     */
    if (num_obj && !(gids && permuted_global_ids)) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Input argument is NULL. Please allocate all required arrays before calling this routine.");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    /*
     *  Find the selected method.
     */

    if (!strcmp(opt.method, "NONE")) {
        if (zz->Proc == zz->Debug_Proc && zz->Debug_Level >= ZOLTAN_DEBUG_PARAMS)
            ZOLTAN_PRINT_WARN(zz->Proc, yo, "Ordering method selected == NONE; no ordering performed\n");

        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_WARN);
    }
    else if (!strcmp(opt.method, "LOCAL_HSFC"))
    {
        Order_fn = Zoltan_LocalHSFC_Order;
        strcpy(zz->Order.order_type, "LOCAL"); /*MMW, not sure about this*/
    }
#ifdef ZOLTAN_PARMETIS
    else if (!strcmp(opt.method, "METIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        strcpy(zz->Order.order_type, "LOCAL");
    }
    else if (!strcmp(opt.method, "PARMETIS")) {
        Order_fn = Zoltan_ParMetis_Order;
        strcpy(zz->Order.order_type, "GLOBAL");
    }
#endif /* ZOLTAN_PARMETIS */
#ifdef ZOLTAN_SCOTCH
    else if (!strcmp(opt.method, "SCOTCH")) {
        Order_fn = Zoltan_Scotch_Order;
        strcpy(zz->Order.order_type, "LOCAL");
    }
    else if (!strcmp(opt.method, "PTSCOTCH")) {
        Order_fn = Zoltan_Scotch_Order;
        strcpy(zz->Order.order_type, "GLOBAL");
    }
#endif /* ZOLTAN_SCOTCH */
#ifdef ZOLTAN_HUND
    else if (!strcasecmp(opt.method, "HUND")) {
        ierr = Zoltan_HUND(zz, num_gid_entries, num_obj, gids, permuted_global_ids, NULL);
        goto End;
    }
#endif /* ZOLTAN_HUND */
    else {
        fprintf(stderr, "%s\n", opt.method);
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unknown ordering method");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ZOLTAN_FATAL);
    }

    /* TODO : Ask why useful ! */
    /*
     *  Construct the heterogenous machine description.
     */
    ierr = Zoltan_Build_Machine_Desc(zz);
    if (ierr == ZOLTAN_FATAL) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return (ierr);
    }
    ZOLTAN_TRACE_DETAIL(zz, yo, "Done machine description");


    /************************************
     *  Check for required query function
     ************************************/
    if (zz->Get_Num_Obj != NULL) {
        local_num_obj = zz->Get_Num_Obj(zz->Get_Num_Obj_Data, &ierr);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Get_Num_Obj.");
            return (ierr);
        }
    }
    else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register ZOLTAN_NUM_OBJ_FN.");
        return (ZOLTAN_FATAL);
    }


    /* TODO allocate all this stuff with the graph */
    local_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, local_num_obj);
    local_rank = (int*) ZOLTAN_MALLOC(local_num_obj*sizeof(int));

    lids = ZOLTAN_MALLOC_LID_ARRAY(zz, local_num_obj);

    /*
     * Call the actual ordering function.
     * Compute gid according to the local graph.
     */

    ierr = (*Order_fn)(zz, local_num_obj, local_gids, lids, local_rank, NULL, &opt);
    ZOLTAN_FREE(&lids);

    if (ierr) {
        sprintf(msg, "Ordering routine returned error code %d.", ierr);
        if (ierr == ZOLTAN_WARN) {
            ZOLTAN_PRINT_WARN(zz->Proc, yo, msg);
        } else {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, msg);
            Zoltan_Multifree(__FILE__, __LINE__, 2,
                             &local_gids, &local_rank);
            ZOLTAN_TRACE_EXIT(zz, yo);
            return (ierr);
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done ordering");

    /* TODO: Use directly the "graph" structure to avoid to duplicate things. */

    /* TODO: At this time, I consider rank == permuted_global_ids */

    /* I store : GNO, rank, permuted GID */
    /* MMW: perhaps don't ever use graph here since we need to support geometric orderings, otherwise need if/else */
    ierr = Zoltan_DD_Create (&dd, zz->Communicator, zz->Num_GID, (local_rank==NULL)?0:1, 0, local_num_obj, 0);
    /* Hope a linear assignment will help a little */
    if (local_num_obj)
        Zoltan_DD_Set_Neighbor_Hash_Fn1(dd, local_num_obj);
    /* Associate all the data with our xGNO */

    Zoltan_DD_Update (dd, local_gids, (ZOLTAN_ID_PTR)local_rank, NULL, NULL, local_num_obj);


    ZOLTAN_FREE(&local_gids);
    ZOLTAN_FREE(&local_rank);

    Zoltan_DD_Find (dd, gids, (ZOLTAN_ID_PTR)permuted_global_ids, NULL, NULL,
                    num_obj, NULL);
    Zoltan_DD_Destroy(&dd);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done Registering results");


    end_time = Zoltan_Time(zz->Timer);
    order_time[0] = end_time - start_time;

    if (zz->Debug_Level >= ZOLTAN_DEBUG_LIST) {
        int i;
        Zoltan_Print_Sync_Start(zz->Communicator, TRUE);
        printf("ZOLTAN: rank for ordering on Proc %d\n", zz->Proc);
        for (i = 0; i < num_obj; i++) {
            printf("GID = ");
            ZOLTAN_PRINT_GID(zz, &(gids[i*(num_gid_entries)]));
            printf(", rank = %3d\n", permuted_global_ids[i]);
        }
        printf("\n");
        Zoltan_Print_Sync_End(zz->Communicator, TRUE);
    }

    /* Print timing info */
    if (zz->Debug_Level >= ZOLTAN_DEBUG_ZTIME) {
        if (zz->Proc == zz->Debug_Proc) {
            printf("ZOLTAN Times:  \n");
        }
        Zoltan_Print_Stats (zz->Communicator, zz->Debug_Proc, order_time[0],
                            "ZOLTAN     Balance:     ");
    }

#ifdef ZOLTAN_HUND
End:
#endif /*ZOLTAN_HUND*/
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
}
Ejemplo n.º 21
0
/* path growing matching, hypergraph version */
static int matching_pgm (ZZ *zz, HGraph *hg, int *match, int *limit)
{
int i, j, k, side = 0, edge, vertex, *Match[2] = {NULL, NULL};
int limits[2], neighbor, next_vertex, pins;
double w[2]={0.0,0.0}, weight, max_weight, *sims = NULL;
char  *yo = "matching_pgm";

  limits[0] = limits[1] = *limit;
  Match[0] = match;
  if (hg->nVtx) {
    if (!(Match[1] = (int*)   ZOLTAN_MALLOC (hg->nVtx * sizeof(int)))
     || !(sims     = (double*) ZOLTAN_CALLOC (hg->nVtx,  sizeof(double))) ) {
      Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      return ZOLTAN_MEMERR;
    }
  }

  for (i = 0; i < hg->nVtx; i++)
    Match[1][i] = i;

  for (i = 0; i < hg->nVtx  &&  limits[side] > 0; i++) {
    if (Match[0][i] == i && Match[1][i] == i) {
      vertex = i;
      while (vertex > 0 && limits[side] > 0) {
        max_weight = 0.0;
        next_vertex = -1;

        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          pins = hg->hindex[edge+1] - hg->hindex[edge];
          weight = 2.0 / ((pins-1)*pins);
          if (hg->ewgt)
            weight *= hg->ewgt[edge];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (neighbor != vertex && Match[0][neighbor] == neighbor && 
                Match[1][neighbor]==neighbor)
               sims[neighbor] += weight;
          }
        }
        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (sims[neighbor] > 0.0) {
              if (sims[neighbor] > max_weight) {
                max_weight = sims[neighbor];
                next_vertex = neighbor;
              }
              sims[neighbor] = 0.0;
            }
          }
        }

        if (next_vertex >= 0) {
          Match[side][vertex] = next_vertex;
          Match[side][next_vertex] = vertex;
          limits[side]--;
          w[side] += max_weight;
          side = 1-side;
        }
        vertex = next_vertex;
      }
    }
  }

  if (w[0] < w[1]) {
    for (i = 0; i < hg->nVtx; i++)
      match[i] = Match[1][i];
    *limit = limits[1];
  }
  else
    *limit = limits[0];

  Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
  return ZOLTAN_OK;
}
Ejemplo n.º 22
0
void Zoltan_PHG_Plot(
  int proc,         /* Processor calling the routine */
  int nvtx,         /* Number of vertices */
  int nparts,       /* Number of partitions; ignored if part == NULL */
  int *vindex,      /* Starting index in vedges of hyperedges for each vertex */
  int *vedge,       /* Hyperedges for each vertex */
  int *part,        /* Partition to which vertices are assigned; if NULL,
                       partition information is not plotted. */
  char *str         /* String included as comment in output files. */
)
{
/* Routine that produces gnuplot output of hypergraph in form of matrix.
 * One column for each vertex.
 * One row for each hyperedge.
 * Entries in matrix entry a[j,i] if vertex i is in hyperedge j.
 * If part == NULL, a single file is produced with all information.
 * (Currently, this capability is serial only.)
 * If part != NULL, a separate file is produced for each partition.
 * (Currently, this capability works in parallel for k >= p.)
 */
static int cnt = 0;
char filename[32];
int i, j, v;
FILE *fp = NULL;
int prev_part = -1;
int *idx = NULL;
int *vtx = NULL;

  idx = (int *) ZOLTAN_MALLOC(2 * nvtx * sizeof(int));
  vtx = idx + nvtx;
  for (i = 0; i < nvtx; i++) vtx[i] = idx[i] = i;
  if (part != NULL) {
    /* sort vertices by partition */
    Zoltan_quicksort_pointer_inc_int_int(idx, part, vtx, 0, nvtx-1);
  }
  else {
    /* write all results to one file */
    sprintf(filename, "hgplot%02d", cnt);
    fp = fopen(filename, "w");
    fprintf(fp, "#%s\n", str);
  }

  for (i = 0; i < nvtx; i++) {
    v = idx[i];
    if (part != NULL) {
      if (part[v] != prev_part) {
        /* open new file for this partition's data */
        if (fp != NULL) fclose(fp);
        sprintf(filename, "hgplot%02d.%02d", cnt, part[v]);
        fp = fopen(filename, "w");
        fprintf(fp, "#%s\n", str);
        prev_part = part[v];
      }
    }
    for (j = vindex[v]; j < vindex[v+1]; j++)
      fprintf(fp, "%d %d\n", v, -vedge[j]);
  }

  fclose(fp);
  ZOLTAN_FREE(&idx);
  if (proc == 0) {
    sprintf(filename, "hgplot%02d.gnuload", cnt);
    fp = fopen(filename, "w");
    fprintf(fp, "set data style points\n");
    fprintf(fp, "set pointsize 5\n");
    fprintf(fp, "set nokey\n");
    fprintf(fp, "set title \"%s\"\n", str);
    fprintf(fp, "set xlabel \"vertices\"\n");
    fprintf(fp, "set ylabel \"-hyperedges\"\n");
    fprintf(fp, "plot ");
    if (part != NULL) {
      for (i = 0; i < nparts; i++) {
        fprintf(fp, "\"hgplot%02d.%02d\"", cnt, i);
        if (i != nparts-1)
          fprintf(fp, ", ");
        else
          fprintf(fp, "\n");
      }
    }
    else {
      fprintf(fp, "\"hgplot%02d\"\n", cnt);
    }
    fclose(fp);
  }
  cnt++;
}
Ejemplo n.º 23
0
int Zoltan_LB_Set_Part_Sizes(ZZ *zz, int global_num,
    int len, int *part_ids, int *wgt_idx, float *part_sizes)
{
/*
 *  Function to set the desired part sizes. This function
 *  only sets values locally. Later, Zoltan_LB_Get_Part_Sizes
 *  collects all the information across processors.
 *
 *  Input:
 *    zz            --  The Zoltan structure to which this method
 *                      applies.
 *    global_num    --  Global part numbers? (0 for local numbers)
 *    len           --  Length of arrays wgt_idx, part_idx, part_sizes
 *    part_ids      --  Array of part ids (local or global)
 *    wgt_idx       --  Array of indices between 0 and Obj_Wgt_Dim-1
 *    part_sizes    --  Array of floats that gives the desired part 
 *                      size for each weight and each part, i.e., 
 *                      part_sizes[i] corresponds to wgt_idx[i] and part_id[i]
 *
 *  Output:
 *    zz->LB.*      --  Appropriate fields set to designated values.
 *    Return value  --  Error code.
 */

  char *yo = "Zoltan_LB_Set_Part_Sizes";
  int i, j, maxlen=0;
  int error = ZOLTAN_OK;
  const int INIT_NUM_PART = 64; /* Initial allocation for Part_Info array. */

  ZOLTAN_TRACE_ENTER(zz, yo);

  /* len = -1 will nullify all part sizes set on this proc */
  if (len == -1){
    zz->LB.Part_Info_Len = 0;
    goto End;
  }

  /* Verify input. */
  if ((part_ids==NULL) || (part_sizes==NULL)){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Invalid input argument NULL.");
    error = ZOLTAN_FATAL;
    goto End;
  }

  /* Do we need more space? */
  if ((!zz->LB.Part_Info) || (zz->LB.Part_Info_Max_Len==0)){
    maxlen = INIT_NUM_PART;          /* Start with space for INIT_NUM_PART */
    zz->LB.Part_Info = (struct Zoltan_part_info *) ZOLTAN_MALLOC(maxlen *
      sizeof(struct Zoltan_part_info));
  }
  if (zz->LB.Part_Info_Len + len > zz->LB.Part_Info_Max_Len){
    maxlen = 2*(zz->LB.Part_Info_Len + len);  /* Double the length */
    zz->LB.Part_Info = (struct Zoltan_part_info *) ZOLTAN_REALLOC( zz->LB.Part_Info, 
                         maxlen * sizeof(struct Zoltan_part_info));
  }

  if (zz->LB.Part_Info == NULL){
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      error = ZOLTAN_MEMERR;
      goto End;
  }

  /* Add new data to part info array. */
  for (i=0,j=zz->LB.Part_Info_Len; i<len; i++,j++){
    zz->LB.Part_Info[j].Size = part_sizes[i];
    zz->LB.Part_Info[j].Part_id = part_ids[i]; 
    zz->LB.Part_Info[j].Idx = (wgt_idx ? wgt_idx[i] : 0); 
    zz->LB.Part_Info[j].Global_num = global_num;
  }

  /* Update values in LB. */
  zz->LB.Part_Info_Len += len;
  if (maxlen > zz->LB.Part_Info_Max_Len)
    zz->LB.Part_Info_Max_Len = maxlen;

End:
  ZOLTAN_TRACE_EXIT(zz, yo);
  return error;
}
Ejemplo n.º 24
0
int       Zoltan_Comm_Do_Post(
ZOLTAN_COMM_OBJ * plan,		/* communication data structure */
int tag,			/* message tag for communicating */
char *send_data,		/* array of data I currently own */
int nbytes,			/* multiplier for sizes */
char *recv_data)		/* array of data I'll own after comm */
{
    char     *send_buff;	/* space to buffer outgoing data */
    int       my_proc;		/* processor ID */
    int       self_recv_address;/* where in recv_data self info starts */
    int       self_num;		/* where in send list my_proc appears */
    int       offset;		/* offset into array I'm copying into */
    int       self_index;	/* send offset for data I'm keeping */
    int       out_of_mem;	/* am I out of memory? */
    int       nblocks;		/* number of procs who need my data */
    int       proc_index;	/* loop counter over procs to send to */
    int       i, j, k, jj;	/* loop counters */

    static char *yo = "Zoltan_Comm_Do_Post";


    /* Check input parameters */
    if (!plan) {
        MPI_Comm_rank(MPI_COMM_WORLD, &my_proc);
	ZOLTAN_COMM_ERROR("Communication plan = NULL", yo, my_proc);
	return ZOLTAN_FATAL;
    }

    MPI_Comm_rank(plan->comm, &my_proc);

    if ((plan->nsends + plan->self_msg) && !send_data) {
        int sum = 0;
        if (plan->sizes_to)   /* Not an error if all sizes_to == 0 */
            for (i = 0; i < (plan->nsends + plan->self_msg); i++) 
                sum += plan->sizes_to[i];
        if (!plan->sizes_to || (plan->sizes_to && sum)) {
            ZOLTAN_COMM_ERROR("nsends not zero, but send_data = NULL", 
                              yo, my_proc);
            return ZOLTAN_FATAL;
        }
    }
    if ((plan->nrecvs + plan->self_msg) && !recv_data) {
        int sum = 0;
        if (plan->sizes_from)   /* Not an error if all sizes_from == 0 */
            for (i = 0; i < (plan->nrecvs + plan->self_msg); i++) 
                sum += plan->sizes_from[i];
        if (!plan->sizes_from || (plan->sizes_from && sum)) {
            ZOLTAN_COMM_ERROR("nrecvs not zero, but recv_data = NULL", 
                              yo, my_proc);
            return ZOLTAN_FATAL;
        }
    }
    if (nbytes < 0) {
	ZOLTAN_COMM_ERROR("Scale factor nbytes is negative", yo, my_proc);
	return ZOLTAN_FATAL;
    }


    /* Post irecvs */

    out_of_mem = 0;

    if (plan->indices_from == NULL) {
	/* Data can go directly into user space. */
	plan->recv_buff = recv_data;
    }
    else {			/* Need to buffer receive to reorder */
	plan->recv_buff = (char *) ZOLTAN_MALLOC(plan->total_recv_size * nbytes);
	if (plan->recv_buff == NULL && plan->total_recv_size * nbytes != 0)
	    out_of_mem = 1;
    }

    if (!out_of_mem) {
	if (plan->sizes == NULL) {	/* All data the same size */
	    k = 0;
	    for (i = 0; i < plan->nrecvs + plan->self_msg; i++) {
		if (plan->procs_from[i] != my_proc) {
		    MPI_Irecv((void *) & plan->recv_buff[plan->starts_from[i] * nbytes],
			      plan->lengths_from[i] * nbytes,
			      (MPI_Datatype) MPI_BYTE, plan->procs_from[i], tag,
			      plan->comm, &plan->request[k]);
		    k++;
		}
		else {
		    self_recv_address = plan->starts_from[i] * nbytes;
		}
	    }
	}

	else {			/* Data of varying sizes */
	    k = 0;
	    for (i = 0; i < plan->nrecvs + plan->self_msg; i++) {
		if (plan->procs_from[i] != my_proc) {
                    if (plan->sizes_from[i]) 
		        MPI_Irecv((void *)
                &plan->recv_buff[plan->starts_from_ptr[i] * nbytes],
			          plan->sizes_from[i] * nbytes,
			          (MPI_Datatype) MPI_BYTE, plan->procs_from[i], 
			          tag, plan->comm, &plan->request[k]);
                    else
                        plan->request[k] = MPI_REQUEST_NULL;
	            k++;
		}
		else {
		    self_recv_address = plan->starts_from_ptr[i] * nbytes;
		}
	    }
	}
    }


    /* Do remaining allocation to check for any mem problems. */
    if (plan->indices_to != NULL) {	/* can't sent straight from input */
	send_buff = (char *) ZOLTAN_MALLOC(plan->max_send_size * nbytes);
	if (send_buff == 0 && plan->max_send_size * nbytes != 0)
	    out_of_mem = 1;
    }
    else
	send_buff = NULL;

    /* Barrier to ensure irecvs are posted before doing any sends. */
    /* Simultaneously see if anyone out of memory */

    MPI_Allreduce(&out_of_mem, &j, 1, MPI_INT, MPI_SUM, plan->comm);

    if (j > 0) {		/* Some proc is out of memory -> Punt */
	ZOLTAN_FREE(&send_buff);
	if (plan->indices_from != NULL)
	    ZOLTAN_FREE(&plan->recv_buff);
	return (ZOLTAN_MEMERR);
    }

    /* Send out data */

    /* Scan through procs_to list to start w/ higher numbered procs */
    /* This should balance message traffic. */

    nblocks = plan->nsends + plan->self_msg;
    proc_index = 0;
    while (proc_index < nblocks && plan->procs_to[proc_index] < my_proc)
	proc_index++;
    if (proc_index == nblocks)
	proc_index = 0;

    if (plan->sizes == NULL) {	/* Data all of same size */
	if (plan->indices_to == NULL) {	/* data already blocked by processor. */
	    for (i = proc_index, j = 0; j < nblocks; j++) {
		if (plan->procs_to[i] != my_proc) {
		    MPI_Rsend((void *) &send_data[plan->starts_to[i] * nbytes],
			      plan->lengths_to[i] * nbytes,
			      (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag,
			      plan->comm);
		}
		else
		    self_num = i;
		if (++i == nblocks)
		    i = 0;
	    }

	    if (plan->self_msg) {	/* Copy data to self. */
		memcpy(&plan->recv_buff[self_recv_address],
		       &send_data[plan->starts_to[self_num] * nbytes],
		       plan->lengths_to[self_num] * nbytes);
	    }
	}

	else {			/* Not blocked by processor.  Need to buffer. */
	    for (i = proc_index, jj = 0; jj < nblocks; jj++) {
		if (plan->procs_to[i] != my_proc) {
		    /* Need to pack message first. */
		    offset = 0;
		    j = plan->starts_to[i];
		    for (k = 0; k < plan->lengths_to[i]; k++) {
			memcpy(&send_buff[offset],
			       &send_data[plan->indices_to[j++] * nbytes], nbytes);
			offset += nbytes;
		    }
		    MPI_Rsend((void *) send_buff, plan->lengths_to[i] * nbytes,
		      (MPI_Datatype) MPI_BYTE, plan->procs_to[i], tag, plan->comm);
		}
		else {
		    self_num = i;
		    self_index = plan->starts_to[i];
		}
		if (++i == nblocks)
		    i = 0;
	    }
	    if (plan->self_msg) {	/* Copy data to self. */
		for (k = 0; k < plan->lengths_to[self_num]; k++) {
		    memcpy(&plan->recv_buff[self_recv_address],
		      &send_data[plan->indices_to[self_index++] * nbytes], nbytes);
		    self_recv_address += nbytes;
		}
	    }

	    ZOLTAN_FREE(&send_buff);
	}
    }
    else {			/* Data of differing sizes */
	if (plan->indices_to == NULL) {	/* data already blocked by processor. */
	    for (i = proc_index, j = 0; j < nblocks; j++) {

		if (plan->procs_to[i] != my_proc) {
                    if (plan->sizes_to[i]) {
		        MPI_Rsend((void *)
                                  &send_data[plan->starts_to_ptr[i] * nbytes],
			          plan->sizes_to[i] * nbytes,
			          (MPI_Datatype) MPI_BYTE, plan->procs_to[i],
			          tag, plan->comm);
                    }
		}
		else
		    self_num = i;
		if (++i == nblocks)
		    i = 0;
	    }

	    if (plan->self_msg) {	/* Copy data to self. */
                if (plan->sizes_to[self_num])
		    memcpy(&plan->recv_buff[self_recv_address],
		           &send_data[plan->starts_to_ptr[self_num] * nbytes],
		           plan->sizes_to[self_num] * nbytes);
	    }
	}

	else {			/* Not blocked by processor.  Need to buffer. */
	    for (i = proc_index, jj = 0; jj < nblocks; jj++) {
		if (plan->procs_to[i] != my_proc) {
		    /* Need to pack message first. */
		    offset = 0;
		    j = plan->starts_to[i];
		    for (k = 0; k < plan->lengths_to[i]; k++) {
                        if (plan->sizes[plan->indices_to[j]]) {
			    memcpy(&send_buff[offset],
			       &send_data[plan->indices_to_ptr[j] * nbytes],
			       plan->sizes[plan->indices_to[j]] * nbytes);
			    offset += plan->sizes[plan->indices_to[j]] * nbytes;
                        }
			j++;
		    }
                    if (plan->sizes_to[i]) {
		        MPI_Rsend((void *) send_buff, 
                                  plan->sizes_to[i] * nbytes,
		                  (MPI_Datatype) MPI_BYTE, plan->procs_to[i],
                                  tag, plan->comm);
                    }
		}
		else
		    self_num = i;
		if (++i == nblocks)
		    i = 0;
	    }
	    if (plan->self_msg) {	/* Copy data to self. */
                if (plan->sizes_to[self_num]) {
		    j = plan->starts_to[self_num];
		    for (k = 0; k < plan->lengths_to[self_num]; k++) {
		        jj = plan->indices_to_ptr[j];
		        memcpy(&plan->recv_buff[self_recv_address],
			       &send_data[jj * nbytes],
			       plan->sizes[plan->indices_to[j]] * nbytes);
		        self_recv_address += plan->sizes[plan->indices_to[j]] 
                                           * nbytes;
		        j++;
		    }
		}
	    }

	    ZOLTAN_FREE(&send_buff);
	}
    }
    return (ZOLTAN_OK);
}
Ejemplo n.º 25
0
int Zoltan_Divide_Machine(
   ZZ *zz,             /* The Zoltan structure (not used now, will be
                          used for pointer to machine details */
   int obj_wgt_dim,    /* Number of different weights (loads). */
   float *part_sizes,  /* Array of partition sizes, containing percentage of 
                          work per partition. (length= obj_wgt_dim*num_parts) */
   int proc,           /* my processor number in global sense */
   MPI_Comm comm,      /* communicator for part of machine to be divided */
   int *set,           /* set that proc is in after divide (lowest global
                          numbered processor in set 0) */
   int *proclower,     /* lowest numbered processor in first set */
   int *procmid,       /* lowest numbered processor in second set */
   int *num_procs,     /* on input, # of procs to be divided
                          on exit, # of procs in the set that proc is in */
   int *partlower,     /* lowest numbered partition in first set */
   int *partmid,       /* lowest numbered partition in second set */
   int *num_parts,     /* on input, # of partitions to be divided
                          on exit, # of parts in the set that proc is in */
   double *fractionlo  /* actual division of machine: % of work to be assigned
                          to first set (array if obj_wgt_dim>1) */
)
{
int i, j, k;
int np = 0;     /* Number of partitions on procmid */
int fpartmid;   /* First partition on procmid */
int totalparts; /* Total number of partitions in input set. */
int totalprocs; /* Total number of processors in input set. */
int dim = obj_wgt_dim;
double *sum = NULL;

/* This routine divides the current machine (defined by the communicator)
 * into two pieces.
 * For now, it simply divides the machine in half.  In the future, it will
 * be a more complicated routine taking into account the architecture of
 * the machine and communication network. 
 * The two resulting sets contain contiguously numbered processors 
 * and partitions.
 */

  if (dim<1) dim = 1;   /* In case obj_wgt_dim==0. */

  /* The following statement assumes that proclower is being set correctly in
     the calling routine if Tflops_Special flag is set */
  if (!zz->Tflops_Special)
     MPI_Allreduce(&proc, proclower, 1, MPI_INT, MPI_MIN, comm);

  totalparts = *partlower + *num_parts;
  totalprocs = *proclower + *num_procs;

  /* Compute procmid as roughly half the number of processors. */
  /* Then partmid is the lowest-numbered partition on procmid. */

  *procmid = *proclower + (*num_procs - 1)/2 + 1;
  if (*procmid < totalprocs)
    Zoltan_LB_Proc_To_Part(zz, *procmid, &np, &fpartmid);
  if (np > 0)
    *partmid = fpartmid;
  else {
    /* No partitions on procmid; find next part number in procs > procmid */
    i = *procmid;
    while (np == 0 && (++i) < totalprocs) {
      Zoltan_LB_Proc_To_Part(zz, i, &np, &fpartmid);
    }
    if (np) 
      *partmid = fpartmid;
    else
      *partmid = totalparts;
  }

  /* Check special cases */

  if (!zz->LB.Single_Proc_Per_Part && *partmid != totalparts) {
    i = Zoltan_LB_Part_To_Proc(zz, *partmid, NULL);
    if (i != *procmid) {

      /* Partition is spread across several processors. 
         Don't allow mid to fall within a partition; reset procmid so that it
         falls at a partition boundary.  */

      if (i != *proclower) {
        /* set procmid to lowest processor containing partmid */
        *procmid = i;
      }
      else { /* i == *proclower */
        /* Move mid to next partition so that procmid != proclower */
        (*partmid)++;
        *procmid = Zoltan_LB_Part_To_Proc(zz, *partmid, NULL);
      }
    }
  }

  /* Sum up desired partition sizes. */
  sum = (double *)ZOLTAN_MALLOC(dim*sizeof(double));

  for (k=0; k<dim; k++){
    sum[k] = 0.0;
    fractionlo[k] = 0.0;
  }
  for (i = 0; i < *num_parts; i++) {
    j = *partlower + i;
    for (k=0; k<dim; k++){
      if (j < *partmid)
        fractionlo[k] += (double) part_sizes[j*dim+k];
      sum[k] += (double) part_sizes[j*dim+k];
    }
  }
  for (k=0; k<dim; k++)
    if (sum[k] != 0.0) fractionlo[k] /= sum[k];

  if (proc < *procmid) {
    *set = 0;
    *num_parts = *partmid - *partlower;
    *num_procs = *procmid - *proclower;
  } 
  else {
    *set = 1;
    *num_parts = totalparts - *partmid;
    *num_procs = totalprocs - *procmid;
  }

  ZOLTAN_FREE(&sum);
  return ZOLTAN_OK;
}
Ejemplo n.º 26
0
static int scale_round_weights(
  float *fwgts, 
  int *iwgts, 
  int n, 
  int dim,
  int mode
)
{
/* Convert floating point weights to integer weights.
 * This routine is stolen from scale_round_weights in parmetis_jostle.c.
 * Because it needs to run only serially, and because it uses only 
 * integers (not idxtype), it has been largely duplicated here.
 */

  int i, j, tmp, ierr; 
  int max_wgt_sum = INT_MAX/8;
  int *nonint;
  float *scale, *sum_wgt, *max_wgt;
  char msg[256];
  static char *yo = "scale_round_weights";

  ierr = ZOLTAN_OK;

  if (mode == 0) {
    /* No scaling; just convert to int */
    for (i=0; i<n*dim; i++){
      iwgts[i] = (int) ceil((double) fwgts[i]);
    }
  }
  else{
    /* Allocate local arrays */
    nonint = (int *)ZOLTAN_MALLOC(dim*sizeof(int));
    scale = (float *)ZOLTAN_MALLOC(3*dim*sizeof(float));
    sum_wgt = scale + dim;
    max_wgt = sum_wgt + dim;
    if (!(nonint && scale)){
      ZOLTAN_PRINT_ERROR(0, yo, "Out of memory.");
      ZOLTAN_FREE(&nonint);
      ZOLTAN_FREE(&scale);
      return ZOLTAN_MEMERR;
    }

    /* Initialize */
    for (j=0; j<dim; j++){
      nonint[j] = 0;
      sum_wgt[j] = 0;
      max_wgt[j] = 0;
    }

    /* Compute local sums of the weights */
    /* Check if all weights are integers */
    for (i=0; i<n; i++){
      for (j=0; j<dim; j++){
        if (!nonint[j]){ 
          /* tmp = (int) roundf(fwgts[i]);  EB: Valid C99, but not C89 */
          tmp = (int) floor((double) fwgts[i] + .5); /* Nearest int */
          if (fabs((double)tmp-fwgts[i*dim+j]) > INT_EPSILON){
            nonint[j] = 1;
          }
        }
        sum_wgt[j] += fwgts[i*dim+j];
        if (fwgts[i*dim+j] > max_wgt[j])
          max_wgt[j] = fwgts[i*dim+j]; 
      }
    }

    /* Calculate scale factor */
    for (j=0; j<dim; j++){
      scale[j] = 1.;
      /* Scale unless all weights are integers (not all zero) */
      if (nonint[j] || (max_wgt[j] <= INT_EPSILON) 
                    || (sum_wgt[j] > max_wgt_sum)){
        if (sum_wgt[j] == 0){
          ierr = ZOLTAN_WARN;
          sprintf(msg, "All weights are zero in component %1d", j);
          ZOLTAN_PRINT_WARN(0, yo, msg);
        }
        else /* sum_wgt[j] != 0) */
          scale[j] = max_wgt_sum/sum_wgt[j];
      }
    }

    /* If mode==2, let the scale factor be the same for all weights */
    if (mode==2){
      for (j=1; j<dim; j++){
        if (scale[j]<scale[0])
          scale[0] = scale[j];
      }
      for (j=1; j<dim; j++){
        scale[j] = scale[0];
      }
    }

    /* Convert weights to positive integers using the computed scale factor */
    for (i=0; i<n; i++){
      for (j=0; j<dim; j++){
        iwgts[i*dim+j] = (int) ceil((double) fwgts[i*dim+j]*scale[j]);
      }
    }

    ZOLTAN_FREE(&nonint);
    ZOLTAN_FREE(&scale);
  }
  return ierr;
}
Ejemplo n.º 27
0
static int create_reverse_plan( 
ZOLTAN_COMM_OBJ *plan,		/* communication data structure */
int      tag,
int      *sizes)		/* variable size of objects (if not NULL) */
{
    int       total_send_length;/* total message length I send in plan */
    int       max_recv_length;	/* biggest message I recv in plan */
    int       sum_recv_sizes;	/* sum of the item sizes I receive */
    int       comm_flag;		/* status flag */
    int       i;		/* loop counter */
    static char *yo = "Zoltan_Comm_Reverse_Plan";

    /* Check input parameters */
    if (!plan){
      ZOLTAN_COMM_ERROR("Communication plan = NULL.", yo, -1);
      return ZOLTAN_FATAL;
    }

    /* Let Zoltan_Comm_Do check the remaining parameters. */

    total_send_length = 0;
    for (i = 0; i < plan->nsends + plan->self_msg; i++) {
        total_send_length += plan->lengths_to[i];
    }

    max_recv_length = 0;
    for (i = 0; i < plan->nrecvs; i++) {
	if (plan->lengths_from[i] > max_recv_length)
	    max_recv_length = plan->lengths_from[i];
    }

    plan->plan_reverse =(ZOLTAN_COMM_OBJ*)ZOLTAN_MALLOC(sizeof(ZOLTAN_COMM_OBJ));

    plan->plan_reverse->nvals = plan->nvals_recv;
    plan->plan_reverse->nvals_recv = plan->nvals;
    plan->plan_reverse->lengths_to = plan->lengths_from;
    plan->plan_reverse->procs_to = plan->procs_from;
    plan->plan_reverse->indices_to = plan->indices_from;
    plan->plan_reverse->starts_to = plan->starts_from;
    plan->plan_reverse->lengths_from = plan->lengths_to;
    plan->plan_reverse->procs_from = plan->procs_to;
    plan->plan_reverse->indices_from = plan->indices_to;
    plan->plan_reverse->starts_from = plan->starts_to;
    plan->plan_reverse->nrecvs = plan->nsends;
    plan->plan_reverse->nsends = plan->nrecvs;
    plan->plan_reverse->self_msg = plan->self_msg;
    plan->plan_reverse->max_send_size = max_recv_length;
    plan->plan_reverse->total_recv_size = total_send_length;
    plan->plan_reverse->comm = plan->comm;
    plan->plan_reverse->sizes = NULL;
    plan->plan_reverse->sizes_to = NULL;
    plan->plan_reverse->sizes_from = NULL;
    plan->plan_reverse->starts_to_ptr = NULL;
    plan->plan_reverse->starts_from_ptr = NULL;
    plan->plan_reverse->indices_to_ptr = NULL;
    plan->plan_reverse->indices_from_ptr = NULL;
    plan->plan_reverse->maxed_recvs = 0;

    if (MPI_RECV_LIMIT > 0){
      /* If we have a limit to the number of posted receives we are allowed,
      ** and our plan has exceeded that, then switch to an MPI_Alltoallv so
      ** that we will have fewer receives posted when we do the communication.
      */
      MPI_Allreduce(&plan->nsends, &i, 1, MPI_INT, MPI_MAX, plan->comm);
      if (i > MPI_RECV_LIMIT){
        plan->plan_reverse->maxed_recvs = 1;
      }
    }

    if (plan->plan_reverse->maxed_recvs){
      plan->plan_reverse->request = NULL;
      plan->plan_reverse->status = NULL;
    }
    else{
      plan->plan_reverse->request = (MPI_Request *)
  	ZOLTAN_MALLOC(plan->plan_reverse->nrecvs * sizeof(MPI_Request));
      if (plan->plan_reverse->request == NULL && plan->plan_reverse->nrecvs != 0) {
  	return(ZOLTAN_MEMERR);
      }
  
      plan->plan_reverse->status = (MPI_Status *)
  	ZOLTAN_MALLOC(plan->plan_reverse->nrecvs * sizeof(MPI_Status));
      if (plan->plan_reverse->status == NULL && plan->plan_reverse->nrecvs != 0) {
  	return(ZOLTAN_MEMERR);
      }
    }

    comm_flag =Zoltan_Comm_Resize(plan->plan_reverse,sizes,tag,&sum_recv_sizes);

    if (comm_flag != ZOLTAN_OK && comm_flag != ZOLTAN_WARN) {
	return(comm_flag);
    }

    if (sum_recv_sizes != plan->plan_reverse->total_recv_size){
       /* Sanity check */
       return(ZOLTAN_FATAL);
    }

    return ZOLTAN_OK;
}
Ejemplo n.º 28
0
int Zoltan_PHG_ParKway(
  ZZ        *zz,
  HGraph    *hg,
  int       nparts,           /* # of desired partitions */
  Partition partvec,          /* Output:  partition assignment vector */
  PHGPartParams *hgp          /* Input: hypergraph parameters */  
)
{
    char *yo = "Zoltan_HG_ParKway";

#ifndef ZOLTAN_PARKWAY
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "ParKway method selected but Zoltan is not"
                       "built and linked with ParKway.");
    return ZOLTAN_FATAL;
#else

    int ierr = ZOLTAN_OK;
    int options[29];                      /* ParKway options */
    int *ivwgts = NULL, *iewgts = NULL;   /* ParKway expects integer weights. */
    int *pvector=NULL;                    /* partvec for "local" vertices */
    int cut;                              /* Diagnostics from ParKway */
    double constraint;                    /* imbalance ratio */
    int i, anVtx, nVtx;                   /* counter and local vertex cnt (for first k-1 parts)*/
    PHGComm *hgc=hg->comm;
    int *disp=NULL, *recv_size=NULL;      /* for allgatherv */
    static int seed=1;
    
    /* ParKway expects integer weights; convert if weights are provided. */
    ivwgts = (int *) ZOLTAN_MALLOC(hg->nVtx  * sizeof(int));
    iewgts = (int *) ZOLTAN_MALLOC(hg->nEdge * sizeof(int));    
    if (!ivwgts || !iewgts)
        ZOLTAN_PARKWAY_ERROR("Memory error.", ZOLTAN_MEMERR);
    
    
    if (hg->VtxWeightDim > 1) { 
        ZOLTAN_PARKWAY_ERROR("ParKway supports Vtx_Weight_Dim == 0 or 1 only.",
                             ZOLTAN_FATAL);
    } else if (hg->VtxWeightDim == 1) 
        scale_round_weights(hg->vwgt, ivwgts, hg->nVtx, hg->VtxWeightDim, 0);
    else 
        for (i=0; i<hg->nVtx; ++i)
            ivwgts[i] = 1;

        
    if (hg->EdgeWeightDim > 1) {
        ZOLTAN_PARKWAY_ERROR("ParKway supports Edge_Weight_Dim == 0 or 1 only.",
                             ZOLTAN_FATAL);
    } else if (hg->EdgeWeightDim == 1) 
        scale_round_weights(hg->ewgt, iewgts, hg->nEdge, hg->EdgeWeightDim, 0);
    else
        for (i=0; i<hg->nEdge; ++i)
            iewgts[i] = 1;
    
    
    
    anVtx = hg->nVtx / hgc->nProc;
    nVtx = (hgc->myProc==hgc->nProc-1) ? hg->nVtx-(anVtx*(hgc->nProc-1)) : anVtx;

    pvector = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int));
    disp = (int *) ZOLTAN_MALLOC(hgc->nProc * sizeof(int));
    recv_size = (int *) ZOLTAN_MALLOC(hgc->nProc * sizeof(int));
    if ((nVtx && !pvector) || !disp || !recv_size)
        ZOLTAN_PARKWAY_ERROR("Memory error.", ZOLTAN_MEMERR);


    /* ----- Set ParKway's options --------------- */
    options[0] = 1;/*0 -> all options use default, else user define*/
    options[1] = seed++;/*0 -> seed chosen by sprng, else use options[1] as seed*/
    options[2] = 0;/*0 -> no disp info, 1 -> some, 2 -> lots*/
    options[3] = 1;/*0 -> do not write partition to disk, 1 -> do write*/
    options[4] = 1;/*number of parallel runs*/
    options[5] = 0;/*vertex to processor allocation: 0 -> as read in, 1 -> random 2 -> as prescribed in partition file */
    options[6] = 100;/*hyperedge length percentile for approx para coarsening and refinement*/
    options[7] = 1;/*increment in percentile options[6]*/
    options[8] = 200;/*numParts*options[5] -> min number of vertices in coarse hypergraph*/
    options[9] = 7;/*[9] and [10] specify reduction ratio in parallel coarsening*/
    options[10] = 4;/*r = [9]/[10]*/
    options[11] = 3;/*vertex visit order: 3 -> random, 1/2 inc/dec by vertex id, 4/5 inc/dec by vertex wt*/
    options[12] = 3;/*divide connectivity by cluster weight/hyperedge length: 0-neither, 1-only cluster, 2-only hedge len, 3-both   */
    options[13] = 3;/*matching request resolution order: 3 -> random, 2 -> as they arrive */
    options[14] = 1;/*number serial partitioning runs*/
    
    options[15] = 5;/*serial partitioning routine, 1-3 RB, 4 khmetis, 5 patoh, see manual*/
    
    if (!strcasecmp(hgp->parkway_serpart, "patoh"))
        options[15] = 5;
    else if (!strcasecmp(hgp->parkway_serpart, "hmetis"))
        options[15] = 4;
    else if (!strcasecmp(hgp->parkway_serpart, "generic"))
        options[15] = 1;
    else if (!strcasecmp(hgp->parkway_serpart, "genericv"))
        options[15] = 2;
    else if (!strcasecmp(hgp->parkway_serpart, "genericmv"))
        options[15] = 3;
    else {
        ZOLTAN_PARKWAY_ERROR("Invalid ParKway serial partitioner. It should be one of; generic, genericv, genericmv, hmetis, patoh.", ZOLTAN_FATAL);
    }
    
    /* uprintf(hgc, "ParKway serpart='%s'  options[13]=%d\n", hgp->parkway_serpart, options[13]); */
    
    options[16] = 2;/*serial coarsening algorithm (only if [15] = RB, see manual)*/
    options[17] = 2;/*num bisection runs in RB (only if [15] = RB, see manual)*/
    options[18] = 10;/*num initial partitioning runs in RB (only if [13] = RB, see manual)*/
    options[19] = 2;/*hmetis_PartKway coarsening option, vals 1-5, see manual (only if [15] = 4)   */
    options[20] = 2;/*hmetis_PartKway refinement option, vals 0-3, see manual (only if [15] = 4)*/
    options[21] = 3;/*patoh_partition parameter settings, vals 1-3, see manual (only if [15] = 5)*/
    options[22] = 1;/*parallel uncoarsening algorithm, 1 simple, 2 only final V-Cycle, 3 all V-Cycle*/
    options[23] = 5;/*limit on number of V-Cycle iterations (only if [22] = 2/3)*/
    options[24] = 0;/*min allowed gain for V-Cycle (percentage, see manual, only if [21] = 2/3)*/
    options[25] = 0;/*percentage threshold used to reject partitions from a number of runs (see manual)*/
    options[26] = 0;/*reduction in [23] as partitions propagate by factor [24]/100 (see manual)*/
    options[27] = 100;/*early exit criterion in parallel refinement, will exit if see ([25]*num vert)/100 consecutive -ve moves */
    options[28] = 0;/*parallel refinement 0->basic, 1->use approx 2->use early exit 3->use approx and early exit  */
    
    constraint = hgp->bal_tol-1.0;
    
    Zoltan_ParaPartKway(nVtx, hg->nEdge, &ivwgts[hgc->myProc*anVtx], iewgts,
                        hg->hindex, hg->hvertex, nparts,
                        constraint, &cut, options, pvector, NULL, hgc->Communicator);
    
/* KDDKDD
   uprintf(hgc, "ParaPartKway cut=%d\n", cut);
*/
    
    
    /* after partitioning Zoltan needs partvec exist on all procs for nProc_x=1 */       
    disp[0] = 0; 
    for (i = 1; i < hgc->nProc; ++i)
        disp[i] = disp[i-1] + anVtx;
    
    MPI_Allgather (&nVtx, 1, MPI_INT, recv_size, 1, MPI_INT, hgc->Communicator);    
    MPI_Allgatherv(pvector, nVtx, MPI_INT, 
                  partvec, recv_size, disp, MPI_INT, hgc->Communicator);

    
  /* HERE:  Check whether imbalance criteria were met. */

End:

    Zoltan_Multifree(__FILE__,__LINE__, 5, &ivwgts, &iewgts, &pvector, &disp, &recv_size);
    
    return ierr;
#endif
}
Ejemplo n.º 29
0
static int
Zoltan_Postprocess_Partition (ZZ *zz,
			      ZOLTAN_Third_Graph *gr,
			      ZOLTAN_Third_Part  *prt,
			      ZOLTAN_Output_Part *part,
			      ZOLTAN_ID_PTR      global_ids,
			      ZOLTAN_ID_PTR      local_ids)
{
  int ierr = ZOLTAN_OK;
  int i, j, nsend;
  int *newproc;

  int num_gid_entries = zz->Num_GID;
  int num_lid_entries = zz->Num_LID;

  /* Partitioning */
  /* Determine new processor and number of objects to export */
  newproc = (int *) ZOLTAN_MALLOC(gr->num_obj * sizeof(int));
  if (gr->num_obj && !newproc){
    /* Not enough memory */
    ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Out of memory. ");
  }
  for (i=0; i<gr->num_obj; i++){
    newproc[i] = Zoltan_LB_Part_To_Proc(zz, prt->part[i],
					&(global_ids[i*num_gid_entries]));
    if (newproc[i]<0){
      ZOLTAN_FREE(&newproc);
      ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL,
			    "Zoltan_LB_Part_To_Proc returned invalid processor number.");
    }
  }

  if (zz->LB.Remap_Flag) {
    int new_map;

    ierr = Zoltan_LB_Remap(zz, &new_map, gr->num_obj, newproc, prt->input_part,
			   prt->part, 1);
    if (ierr < 0) {
      ZOLTAN_FREE(&newproc);
      ZOLTAN_THIRD_ERROR(ZOLTAN_FATAL,
			    "Error returned from Zoltan_LB_Remap");
    }
  }

  nsend = 0;
  for (i=0; i<gr->num_obj; i++){
    if ((prt->part[i] != prt->input_part[i]) || ((!part->compute_only_part_changes) &&
						  (newproc[i] != zz->Proc)))
      nsend++;
    if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL)
      printf("[%1d] DEBUG: local object %1d: old part = %1d, new part = %1d\n",
	     zz->Proc, i, prt->input_part[i], prt->part[i]);
  }

  /* Create export lists */
  if (zz->LB.Return_Lists){
    part->num_exp = nsend;
    if (nsend > 0) {
      if (!Zoltan_Special_Malloc(zz,(void **)part->exp_gids,nsend,ZOLTAN_SPECIAL_MALLOC_GID)) {
	ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Not enough memory.");
      }
      if (!Zoltan_Special_Malloc(zz,(void **)part->exp_lids,nsend,ZOLTAN_SPECIAL_MALLOC_LID)) {
	Zoltan_Special_Free(zz,(void **)part->exp_gids,ZOLTAN_SPECIAL_MALLOC_GID);
	ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Not enough memory.");
      }
      if (!Zoltan_Special_Malloc(zz,(void **)part->exp_procs,nsend,ZOLTAN_SPECIAL_MALLOC_INT)) {
	Zoltan_Special_Free(zz,(void **)part->exp_lids,ZOLTAN_SPECIAL_MALLOC_LID);
	Zoltan_Special_Free(zz,(void **)part->exp_gids,ZOLTAN_SPECIAL_MALLOC_GID);
	ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Not enough memory.");
      }
      if (!Zoltan_Special_Malloc(zz,(void **)part->exp_part,nsend,ZOLTAN_SPECIAL_MALLOC_INT)) {
	Zoltan_Special_Free(zz,(void **)part->exp_lids,ZOLTAN_SPECIAL_MALLOC_LID);
	Zoltan_Special_Free(zz,(void **)part->exp_gids,ZOLTAN_SPECIAL_MALLOC_GID);
	  Zoltan_Special_Free(zz,(void **)part->exp_procs,ZOLTAN_SPECIAL_MALLOC_INT);
	  ZOLTAN_THIRD_ERROR(ZOLTAN_MEMERR, "Not enough memory.");
      }
      j = 0;
      for (i=0; i<gr->num_obj; i++){
	if ((prt->part[i] != prt->input_part[i]) || ((!part->compute_only_part_changes)
						      && (newproc[i] != zz->Proc))){
	  /* Object should move to new partition or processor */
	  ZOLTAN_SET_GID(zz, &((*(part->exp_gids))[j*num_gid_entries]),
			 &(global_ids[i*num_gid_entries]));
	  if (num_lid_entries)
	    ZOLTAN_SET_LID(zz, &((*(part->exp_lids))[j*num_lid_entries]),
			   &(local_ids[i*num_lid_entries]));
	  (*(part->exp_part))[j] = prt->part[i];
	  (*(part->exp_procs))[j] = newproc[i];
/*	  printf("[%1d] Debug: Move object %1d to part %1d, proc %1d\n", */
/*	     zz->Proc, i, prt->part[i], newproc[i]); */
	  j++;
	}
      }
    }
  }

  ZOLTAN_FREE(&newproc);

  return (ZOLTAN_OK);
}
Ejemplo n.º 30
0
int       Zoltan_Comm_Do_AlltoAll(
ZOLTAN_COMM_OBJ * plan,		/* communication data structure */
char *send_data,		/* array of data I currently own */
int nbytes,			/* multiplier for sizes */
char *recv_data)		/* array of data I'll own after comm */
{
  static char *yo = "Zoltan_Comm_Do_AlltoAll";
  char *outbuf=NULL, *inbuf=NULL, *buf=NULL;
  int *outbufCounts=NULL, *outbufOffsets=NULL; 
  int *inbufCounts=NULL, *inbufOffsets=NULL;
  int nprocs, me, rc, i, j, k, p, sorted;
  int nSendMsgs, nSendItems, nRecvMsgs, nRecvItems;
  int length, offset, itemSize, outbufLen;

  int sm = (plan->self_msg > 0) ? 1 : 0;

  nSendMsgs = plan->nsends + sm;
  nRecvMsgs = plan->nrecvs + sm;

  for (i=0, nSendItems=0; i <nSendMsgs; i++){
    nSendItems += plan->lengths_to[i];
  }
  for (i=0, nRecvItems=0; i <nRecvMsgs; i++){
    nRecvItems += plan->lengths_from[i];
  }

  MPI_Comm_size(plan->comm, &nprocs);
  MPI_Comm_rank(plan->comm, &me);

  outbufCounts = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  outbufOffsets = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  inbufCounts = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  inbufOffsets = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));

  if (!outbufCounts || !outbufOffsets || !inbufCounts || !inbufOffsets){
    ZOLTAN_COMM_ERROR("memory error", yo, me);
  }

  /* The *_to fields of the plan refer to the items in the send_data buffer,
   * and how to pull out the correct items for each receiver.  The
   * *_from fields of the plan refer to the recv_data buffer.  Items 
   * arrive in process rank order, and these fields tell us where to
   * put them in the recv_data buffer.
   */

  /* CREATE SEND BUFFER */

  sorted = 0;
  if (plan->indices_to == NULL){
    sorted = 1;
    for (i=1; i< nSendMsgs; i++){
      if (plan->starts_to[i] < plan->starts_to[i-1]){
        sorted = 0;
        break;
      }
    }
  }

  if (plan->sizes_to){
    /*
     * Each message contains items for a process, and each item may be
     * a different size.
     */

    for (i=0, outbufLen=0; i < nSendMsgs; i++){
      outbufLen += plan->sizes_to[i];
    }

    if (plan->indices_to){
      /*
       * items are not grouped by message
       */

      buf = outbuf = (char *)ZOLTAN_MALLOC(outbufLen * nbytes);
      if (outbufLen && nbytes && !outbuf){
        ZOLTAN_COMM_ERROR("memory error", yo, me);
      }

      for (p=0, i=0, k=0; p < nprocs; p++){

        length = 0;

        if (i < nSendMsgs){
          if (plan->procs_to[i] == p){   /* procs_to is sorted */
  
            for (j=0; j < plan->lengths_to[i]; j++,k++){
              itemSize = plan->sizes[plan->indices_to[k]] * nbytes;
              offset = plan->indices_to_ptr[k] * nbytes;
  
              memcpy(buf, send_data + offset, itemSize);
  
              buf += itemSize;
              length += itemSize;
            }
            i++;
          }
        }
  
        outbufCounts[p] = length;
        if (p){
          outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
        }
      }
    }
    else{
      /*
       * items are stored contiguously for each message
       */

      if (!sorted || (plan->nvals > nSendItems) ){

        buf = outbuf = (char *)ZOLTAN_MALLOC(outbufLen * nbytes);
        if (outbufLen && nbytes && !outbuf){
          ZOLTAN_COMM_ERROR("memory error", yo, me);
        }
      }
      else{
        /* All items in send_data are being sent, and they are sorted
         * in process rank order.
         */
        outbuf = send_data;
      }

      for (p=0, i=0; p < nprocs; p++){

        length = 0;

        if (i < nSendMsgs){
          if (plan->procs_to[i] == p){   /* procs_to is sorted */
            length = plan->sizes_to[i] * nbytes;
            offset = plan->starts_to_ptr[i] * nbytes;
  
            if ((!sorted || (plan->nvals > nSendItems)) && length){
              memcpy(buf, send_data + offset, length);
              buf += length;
            }
            i++;
          }
        }
  
        outbufCounts[p] = length;
        if (p){
          outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
        }
      }
    }
  }
  else if (plan->indices_to){
    /*
     * item sizes are constant, however the items belonging in a given
     * message may not be contiguous in send_data
     */

    buf = outbuf = (char *)ZOLTAN_MALLOC(nSendItems * nbytes);
    if (nSendMsgs && nbytes && !outbuf){
      ZOLTAN_COMM_ERROR("memory error", yo, me);
    }

    for (p=0, i=0, k=0; p < nprocs; p++){

      length = 0;
     
      if (i < nSendMsgs){
        if (plan->procs_to[i] == p){   /* procs_to is sorted */
          for (j=0; j < plan->lengths_to[i]; j++,k++){
            offset = plan->indices_to[k] * nbytes;
            memcpy(buf, send_data + offset, nbytes);
            buf += nbytes;
          }
          length = plan->lengths_to[i] * nbytes;
          i++;
        }
      }

      outbufCounts[p] = length;
      if (p){
        outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
      }
    }
  }
  else{                          

    /* item sizes are constant, and items belonging to a
     * given message are always stored contiguously in send_data
     */

    if (!sorted || (plan->nvals > nSendItems)){
      buf = outbuf = (char *)ZOLTAN_MALLOC(nSendItems * nbytes);
      if (nSendItems && nbytes && !outbuf){
        ZOLTAN_COMM_ERROR("memory error", yo, me);
      }
    }
    else{
      /* send_data is sorted by process, and we don't skip
       * any of the data in the buffer, so we can use send_data 
       * in the alltoall call
       */
      outbuf = send_data;
    }

    for (p=0,i=0; p < nprocs; p++){

      length = 0;
     
      if (i < nSendMsgs){
        if (plan->procs_to[i] == p){    /* procs_to is sorted */
          offset = plan->starts_to[i] * nbytes;
          length = plan->lengths_to[i] * nbytes;
  
          if ((!sorted || (plan->nvals > nSendItems)) && length){
            memcpy(buf, send_data + offset, length);
            buf += length;
          }
          i++;
        }
      }

      outbufCounts[p] = length;
      if (p){
        outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
      }
    }
  }

  /* CREATE RECEIVE BUFFER */

  sorted = 0;
  if (plan->indices_from == NULL){
    sorted = 1;
    for (i=1; i< nRecvMsgs; i++){
      if (plan->starts_from[i] < plan->starts_from[i-1]){
        sorted = 0;
        break;
      }
    }
  }

  if (sorted){
    /* Caller already expects received data to be ordered by
     * the sending process rank.
     */
    inbuf = recv_data;
  }
  else{
    inbuf = (char *)ZOLTAN_MALLOC(plan->total_recv_size * nbytes);
    if (plan->total_recv_size && nbytes && !inbuf){
      ZOLTAN_COMM_ERROR("memory error", yo, me);
    }
  }

  for (p=0, i=0; p < nprocs; p++){
    length = 0;

    if (i < nRecvMsgs){
      if (plan->procs_from[i] == p){
  
        if (plan->sizes == NULL){
          length = plan->lengths_from[i] * nbytes;
        }
        else{
          length = plan->sizes_from[i] * nbytes;
        }  
        i++;
      }
    }

    inbufCounts[p] = length;
    if (p){
      inbufOffsets[p] = inbufOffsets[p-1] + inbufCounts[p-1];
    }
  }

  /* EXCHANGE DATA */

  rc = MPI_Alltoallv(outbuf, outbufCounts, outbufOffsets, MPI_BYTE,
                     inbuf, inbufCounts, inbufOffsets, MPI_BYTE,
                     plan->comm);

  if (outbuf != send_data){
    ZOLTAN_FREE(&outbuf);
  }
  ZOLTAN_FREE(&outbufCounts);
  ZOLTAN_FREE(&outbufOffsets);


  /* WRITE RECEIVED DATA INTO USER'S BUFFER WHERE IT'S EXPECTED */

  if (!sorted){

    buf = inbuf;

    if (plan->sizes == NULL){

      /* each item in each message is nbytes long */

      if (plan->indices_from == NULL){
        for (i=0; i < nRecvMsgs; i++){
          offset = plan->starts_from[i] * nbytes;
          length = plan->lengths_from[i] * nbytes;
          memcpy(recv_data + offset, buf, length);
          buf += length;
        }
      }
      else{
        for (i=0,k=0; i < nRecvMsgs; i++){

          for (j=0; j < plan->lengths_from[i]; j++,k++){
            offset = plan->indices_from[k] * nbytes;
            memcpy(recv_data + offset, buf, nbytes);
            buf += nbytes;
          }
        }
      }
    }
    else{  /* (sizes!=NULL) && (indices_from!=NULL) not allowed by Zoltan_Comm_Resize */

      /* items can be different sizes */

      for (i=0; i < nRecvMsgs; i++){
        offset = plan->starts_from_ptr[i] * nbytes;
        length = plan->sizes_from[i] * nbytes;
        memcpy(recv_data + offset, buf, length);
        buf += length;
      }
    }

    ZOLTAN_FREE(&inbuf);
  }

  ZOLTAN_FREE(&inbufCounts);
  ZOLTAN_FREE(&inbufOffsets);

  return ZOLTAN_OK;
}