Esempio n. 1
0
/*************************************************************************
* This function is the entry point of the initial partition algorithm
* that does recursive bissection.
* This algorithm assembles the graph to all the processors and preceeds
* by parallelizing the recursive bisection step.
**************************************************************************/
void InitPartition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, ncon, mype, npes, gnvtxs, ngroups;
  idx_t *xadj, *adjncy, *adjwgt, *vwgt;
  idx_t *part, *gwhere0, *gwhere1;
  idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt;
  graph_t *agraph;
  idx_t lnparts, fpart, fpe, lnpes; 
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum;
  MPI_Comm ipcomm;
  struct {
    double sum;
    int rank;
  } lpesum, gpesum;

  WCOREPUSH;

  ncon = graph->ncon;

  ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

  lbvec = rwspacemalloc(ctrl, ncon);

  /* assemble the graph to all the processors */
  agraph = AssembleAdaptiveGraph(ctrl, graph);
  gnvtxs = agraph->nvtxs;

  /* make a copy of the graph's structure for later */
  xadj   = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1));
  vwgt   = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon));
  adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges));
  adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges));
  part   = iwspacemalloc(ctrl, gnvtxs);

  /* create different processor groups */
  gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);


  /* Go into the recursive bisection */
  METIS_SetDefaultOptions(moptions);
  moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1;

  tpwgts  = ctrl->tpwgts;
  tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

  lnparts = ctrl->nparts;
  fpart = fpe = 0;
  lnpes = npes;
  while (lnpes > 1 && lnparts > 1) {
    /* determine the weights of the two partitions as a function of the 
       weight of the target partition weights */
    for (j=(lnparts>>1), i=0; i<ncon; i++) {
      tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
      tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
      wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
      tpwgts2[i]      *= wsum;
      tpwgts2[ncon+i] *= wsum;
    }

    METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, 
          &edgecut, part);

    /* pick one of the branches */
    if (mype < fpe+lnpes/2) {
      KeepPart(ctrl, agraph, part, 0);
      lnpes   = lnpes/2;
      lnparts = lnparts/2;
    }
    else {
      KeepPart(ctrl, agraph, part, 1);
      fpart   = fpart + lnparts/2;
      fpe     = fpe + lnpes/2;
      lnpes   = lnpes - lnpes/2;
      lnparts = lnparts - lnparts/2;
    }
  }

  gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs));
  gwhere1 = iwspacemalloc(ctrl, gnvtxs);

  if (lnparts == 1) { /* Case npes is greater than or equal to nparts */
    /* Only the first process will assign labels (for the reduction to work) */
    if (mype == fpe) {
      for (i=0; i<agraph->nvtxs; i++) 
        gwhere0[agraph->label[i]] = fpart;
    }
  }
  else { /* Case in which npes is smaller than nparts */
    /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
    tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
    for (j=0; j<ncon; j++) {
      for (wsum=0.0, i=0; i<lnparts; i++) {
        tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
        wsum += tpwgts[i*ncon+j];
      }
      for (wsum=1.0/wsum, i=0; i<lnparts; i++) 
        tpwgts[i*ncon+j] *= wsum;
    }

    METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
          &edgecut, part);

    for (i=0; i<agraph->nvtxs; i++) 
      gwhere0[agraph->label[i]] = fpart + part[i];
  }

  gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm);

  if (ngroups > 1) {
    tmpxadj   = agraph->xadj;
    tmpadjncy = agraph->adjncy;
    tmpadjwgt = agraph->adjwgt;
    tmpvwgt   = agraph->vwgt;
    tmpwhere  = agraph->where;

    agraph->xadj   = xadj;
    agraph->adjncy = adjncy;
    agraph->adjwgt = adjwgt;
    agraph->vwgt   = vwgt;
    agraph->where  = gwhere1;
    agraph->vwgt   = vwgt;
    agraph->nvtxs  = gnvtxs;

    edgecut = ComputeSerialEdgeCut(agraph);
    ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec);
    lbsum = rsum(ncon, lbvec, 1);

    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut,   1, IDX_T,  MPI_MAX, ctrl->gcomm);
    gkMPI_Allreduce((void *)&lbsum,   (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm);

    lpesum.sum = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION*ncon) {
      if (lbsum < UNBALANCE_FRACTION*ncon)
        lpesum.sum = edgecut;
      else
        lpesum.sum = max_cut;
    } 
    lpesum.rank = ctrl->mype;
    
    gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ctrl->gcomm);
    gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm);

    agraph->xadj   = tmpxadj;
    agraph->adjncy = tmpadjncy;
    agraph->adjwgt = tmpadjwgt;
    agraph->vwgt   = tmpvwgt;
    agraph->where  = tmpwhere;
  }

  icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where);

  FreeGraph(agraph);
  gkMPI_Comm_free(&ipcomm);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

  WCOREPOP;
}
Esempio n. 2
0
int CheckParams(ctrl_t *ctrl)
{
  idx_t i, j;
  real_t sum;
  mdbglvl_et  dbglvl=METIS_DBG_INFO;

  switch (ctrl->optype) {
    case METIS_OP_PMETIS:
      if (ctrl->objtype != METIS_OBJTYPE_CUT) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
        return 0;
      }
      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
        return 0;
      }
      if (ctrl->iptype != METIS_IPTYPE_GROW && ctrl->iptype != METIS_IPTYPE_RANDOM) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
        return 0;
      }
      if (ctrl->rtype != METIS_RTYPE_FM) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
        return 0;
      }
      if (ctrl->ncuts <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n"));
        return 0;
      }
      if (ctrl->niter <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
        return 0;
      }
      if (ctrl->ufactor <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
        return 0;
      }
      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
        return 0;
      }
      if (ctrl->nparts <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
        return 0;
      }
      if (ctrl->ncon <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
        return 0;
      }

      for (i=0; i<ctrl->ncon; i++) {
        sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon);
        if (sum < 0.99 || sum > 1.01) {
          IFSET(dbglvl, METIS_DBG_INFO, 
              printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i));
          return 0;
        }
      }
      for (i=0; i<ctrl->ncon; i++) {
        for (j=0; j<ctrl->nparts; j++) {
          if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) {
            IFSET(dbglvl, METIS_DBG_INFO, 
                printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i));
            return 0;
          }
        }
      }

      for (i=0; i<ctrl->ncon; i++) {
        if (ctrl->ubfactors[i] <= 1.0) {
          IFSET(dbglvl, METIS_DBG_INFO, 
              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
          return 0;
        }
      }

      break;

    case METIS_OP_KMETIS:
      if (ctrl->objtype != METIS_OBJTYPE_CUT && ctrl->objtype != METIS_OBJTYPE_VOL) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
        return 0;
      }
      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
        return 0;
      }
      if (ctrl->iptype != METIS_IPTYPE_METISRB) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
        return 0;
      }
      if (ctrl->rtype != METIS_RTYPE_GREEDY) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
        return 0;
      }
      if (ctrl->ncuts <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncuts.\n"));
        return 0;
      }
      if (ctrl->niter <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
        return 0;
      }
      if (ctrl->ufactor <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
        return 0;
      }
      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
        return 0;
      }
      if (ctrl->nparts <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
        return 0;
      }
      if (ctrl->ncon <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
        return 0;
      }
      if (ctrl->contig != 0 && ctrl->contig != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect contig.\n"));
        return 0;
      }
      if (ctrl->minconn != 0 && ctrl->minconn != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect minconn.\n"));
        return 0;
      }

      for (i=0; i<ctrl->ncon; i++) {
        sum = rsum(ctrl->nparts, ctrl->tpwgts+i, ctrl->ncon);
        if (sum < 0.99 || sum > 1.01) {
          IFSET(dbglvl, METIS_DBG_INFO, 
              printf("Input Error: Incorrect sum of %"PRREAL" for tpwgts for constraint %"PRIDX".\n", sum, i));
          return 0;
        }
      }
      for (i=0; i<ctrl->ncon; i++) {
        for (j=0; j<ctrl->nparts; j++) {
          if (ctrl->tpwgts[j*ctrl->ncon+i] <= 0.0) {
            IFSET(dbglvl, METIS_DBG_INFO, 
                printf("Input Error: Incorrect tpwgts for partition %"PRIDX" and constraint %"PRIDX".\n", j, i));
            return 0;
          }
        }
      }

      for (i=0; i<ctrl->ncon; i++) {
        if (ctrl->ubfactors[i] <= 1.0) {
          IFSET(dbglvl, METIS_DBG_INFO, 
              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
          return 0;
        }
      }

      break;



    case METIS_OP_OMETIS:
      if (ctrl->objtype != METIS_OBJTYPE_NODE) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect objective type.\n"));
        return 0;
      }
      if (ctrl->ctype != METIS_CTYPE_RM && ctrl->ctype != METIS_CTYPE_SHEM) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect coarsening scheme.\n"));
        return 0;
      }
      if (ctrl->iptype != METIS_IPTYPE_EDGE && ctrl->iptype != METIS_IPTYPE_NODE) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect initial partitioning scheme.\n"));
        return 0;
      }
      if (ctrl->rtype != METIS_RTYPE_SEP1SIDED && ctrl->rtype != METIS_RTYPE_SEP2SIDED) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect refinement scheme.\n"));
        return 0;
      }
      if (ctrl->nseps <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nseps.\n"));
        return 0;
      }
      if (ctrl->niter <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect niter.\n"));
        return 0;
      }
      if (ctrl->ufactor <= 0) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ufactor.\n"));
        return 0;
      }
      if (ctrl->numflag != 0 && ctrl->numflag != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect numflag.\n"));
        return 0;
      }
      if (ctrl->nparts != 3) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect nparts.\n"));
        return 0;
      }
      if (ctrl->ncon != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ncon.\n"));
        return 0;
      }
      if (ctrl->compress != 0 && ctrl->compress != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect compress.\n"));
        return 0;
      }
      if (ctrl->ccorder != 0 && ctrl->ccorder != 1) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect ccorder.\n"));
        return 0;
      }
      if (ctrl->pfactor < 0.0 ) {
        IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect pfactor.\n"));
        return 0;
      }

      for (i=0; i<ctrl->ncon; i++) {
        if (ctrl->ubfactors[i] <= 1.0) {
          IFSET(dbglvl, METIS_DBG_INFO, 
              printf("Input Error: Incorrect ubfactor for constraint %"PRIDX".\n", i));
          return 0;
        }
      }

      break;

    default:
      IFSET(dbglvl, METIS_DBG_INFO, printf("Input Error: Incorrect optype\n"));
      return 0;
  }

  return 1;
}
Esempio n. 3
0
/*************************************************************************
* This function performs a k-way directed diffusion
**************************************************************************/
real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home)
{
  idx_t ii, i, j, k, l, nvtxs, nedges, nparts;
  idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize;
  idx_t npasses, first, second, third, mind, maxd;
  idx_t *xadj, *adjncy, *adjwgt, *where, *perm;
  idx_t *rowptr, *colind, *ed, *psize;
  real_t *transfer, *tmpvec;
  real_t balance = -1.0, *load, *solution, *workspace;
  real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor;
  matrix_t matrix;
  ikv_t *cand;
  idx_t ndirty, nclean, dptr, clean;

  nvtxs        = graph->nvtxs;
  nedges       = graph->nedges;
  xadj         = graph->xadj;
  nvwgt        = graph->nvwgt;
  adjncy       = graph->adjncy;
  adjwgt       = graph->adjwgt;
  where        = graph->where;
  nparts       = ctrl->nparts;
  ubfactor     = ctrl->ubvec[0];
  matrix.nrows = nparts;

  flowFactor = 0.35;
  flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
  flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
  flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;

  /* allocate memory */
  solution                   = rmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution");
  tmpvec                     = solution + nparts;
  npwgts                     = solution + 2*nparts;
  load                       = solution + 3*nparts;
  matrix.values              = solution + 4*nparts;
  transfer = matrix.transfer = solution + 4*nparts + nedges;

  perm                   = imalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm");
  ed                     = perm + nvtxs;
  psize                  = perm + 2*nvtxs;
  rowptr = matrix.rowptr = perm + 2*nvtxs + nparts;
  colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1;

  /*GKTODO - Potential problem with this malloc */
  wsize     = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace");
  cand      = ikvmalloc(nvtxs, "WavefrontDiffusion: cand");


  /*****************************/
  /* Populate empty subdomains */
  /*****************************/
  iset(nparts, 0, psize);
  for (i=0; i<nvtxs; i++) 
    psize[where[i]]++;

  mind = iargmin(nparts, psize);
  maxd = iargmax(nparts, psize);
  if (psize[mind] == 0) {
    for (i=0; i<nvtxs; i++) {
      k = (RandomInRange(nvtxs)+i)%nvtxs; 
      if (where[k] == maxd) {
        where[k] = mind;
        psize[mind]++;
        psize[maxd]--;
        break;
      }
    }
  }

  iset(nvtxs, 0, ed);
  rset(nparts, 0.0, npwgts);
  for (i=0; i<nvtxs; i++) {
    npwgts[where[i]] += nvwgt[i];
    for (j=xadj[i]; j<xadj[i+1]; j++)
      ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);
  }

  ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
  done = 0;


  /* zero out the tmpvec array */
  rset(nparts, 0.0, tmpvec);

  npasses = gk_min(nparts/2, NGD_PASSES);
  for (l=0; l<npasses; l++) {
    /* Set-up and solve the diffusion equation */
    nswaps = 0;

    /************************/
    /* Solve flow equations */
    /************************/
    SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);

    /* check for disconnected subdomains */
    for(i=0; i<matrix.nrows; i++) {
      if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
        cost = (real_t)(ctrl->mype); 
	goto CleanUpAndExit;
      }
    }

    ConjGrad2(&matrix, load, solution, 0.001, workspace);
    ComputeTransferVector(1, &matrix, solution, transfer, 0);

    GetThreeMax(nparts, load, &first, &second, &third);

    if (l%3 == 0) {
      FastRandomPermute(nvtxs, perm, 1);
    }
    else {
      /*****************************/
      /* move dirty vertices first */
      /*****************************/
      ndirty = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])
          ndirty++;
      }

      dptr = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])
          perm[dptr++] = i;
        else
          perm[ndirty++] = i;
      }

      PASSERT(ctrl, ndirty == nvtxs);
      ndirty = dptr;
      nclean = nvtxs-dptr;
      FastRandomPermute(ndirty, perm, 0);
      FastRandomPermute(nclean, perm+ndirty, 0);
    }

    if (ctrl->mype == 0) {
      for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
        i = perm[ii];
        if (ed[i] != 0) {
          cand[k].key = -ed[i];
          cand[k++].val = i;
        }
        else {
          cand[--j].key = 0;
          cand[j].val = i;
        }
      }
      ikvsorti(k, cand);
    }


    for (ii=0; ii<nvtxs/3; ii++) {
      i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
      from = where[i];

      /* don't move out the last vertex in a subdomain */
      if (psize[from] == 1)
        continue;

      clean = (from == home[i]) ? 1 : 0;

      /* only move from top three or dirty vertices */
      if (from != first && from != second && from != third && clean)
        continue;

      /* Scatter the sparse transfer row into the dense tmpvec row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
        tmpvec[colind[j]] = transfer[j];

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        to = where[adjncy[j]];
        if (from != to) {
          if (tmpvec[to] > (flowFactor * nvwgt[i])) {
            tmpvec[to] -= nvwgt[i];
            INC_DEC(psize[to], psize[from], 1);
            INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
            INC_DEC(load[to], load[from], nvwgt[i]);
            where[i] = to;
            nswaps++;

            /* Update external degrees */
            ed[i] = 0;
            for (k=xadj[i]; k<xadj[i+1]; k++) {
              edge = adjncy[k];
              ed[i] += (to != where[edge] ? adjwgt[k] : 0);

              if (where[edge] == from)
                ed[edge] += adjwgt[k];
              if (where[edge] == to)
                ed[edge] -= adjwgt[k];
            }
            break;
          }
        }
      }

      /* Gather the dense tmpvec row into the sparse transfer row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
        transfer[j] = tmpvec[colind[j]];
        tmpvec[colind[j]] = 0.0;
      }
      ASSERT(fabs(rsum(nparts, tmpvec, 1)) < .0001)
    }

    if (l % 2 == 1) {
      balance = rmax(nparts, npwgts)*nparts;
      if (balance < ubfactor + 0.035)
        done = 1;

      if (GlobalSESum(ctrl, done) > 0)
        break;

      noswaps = (nswaps > 0) ? 0 : 1;
      if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)
        break;

    }
  }

  graph->mincut = ComputeSerialEdgeCut(graph);
  totalv        = Mc_ComputeSerialTotalV(graph, home);
  cost          = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv;


CleanUpAndExit:
  gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);

  return cost;
}
Esempio n. 4
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, nvtxs, nedges, ncon;
  idx_t mype, npes, srnpes, srmype; 
  idx_t *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
  idx_t *part, *lwhere, *home;
  idx_t lnparts, fpart, fpe, lnpes, ngroups;
  idx_t *rcounts, *rdispls;
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  idx_t sr_pe, gd_pe, sr, gd, who_wins;
  real_t my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
  real_t rating, max_rating, your_cost = -1.0, your_balance = -1.0;
  real_t lbsum, min_lbsum, *lbvec, *tpwgts, *tpwgts2, buffer[2];
  graph_t *agraph, cgraph;
  ctrl_t *myctrl;
  MPI_Status status;
  MPI_Comm ipcomm, srcomm;
  struct {
    double cost;
    int rank;
  } lpecost, gpecost;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));
  WCOREPUSH;

  vtxdist = graph->vtxdist;
  agraph  = AssembleAdaptiveGraph(ctrl, graph);
  nvtxs   = cgraph.nvtxs  = agraph->nvtxs;
  nedges  = cgraph.nedges = agraph->nedges;
  ncon    = cgraph.ncon   = agraph->ncon;
  xadj    = cgraph.xadj   = icopy(nvtxs+1, agraph->xadj, iwspacemalloc(ctrl, nvtxs+1));
  vwgt    = cgraph.vwgt   = icopy(nvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, nvtxs*ncon));
  vsize   = cgraph.vsize  = icopy(nvtxs, agraph->vsize, iwspacemalloc(ctrl, nvtxs));
  adjncy  = cgraph.adjncy = icopy(nedges, agraph->adjncy, iwspacemalloc(ctrl, nedges));
  adjwgt  = cgraph.adjwgt = icopy(nedges, agraph->adjwgt, iwspacemalloc(ctrl, nedges));
  part    = cgraph.where  = agraph->where = iwspacemalloc(ctrl, nvtxs);

  lwhere = iwspacemalloc(ctrl, nvtxs);
  home   = iwspacemalloc(ctrl, nvtxs);
  lbvec  = rwspacemalloc(ctrl, graph->ncon);


  /****************************************/
  /****************************************/
  if (ctrl->ps_relation == PARMETIS_PSR_UNCOUPLED) {
    WCOREPUSH;
    rcounts = iwspacemalloc(ctrl, ctrl->npes);
    rdispls = iwspacemalloc(ctrl, ctrl->npes+1);

    for (i=0; i<ctrl->npes; i++) 
      rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
    MAKECSR(i, ctrl->npes, rdispls);

    gkMPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_T,
        (void *)part, rcounts, rdispls, IDX_T, ctrl->comm);

    for (i=0; i<agraph->nvtxs; i++)
      home[i] = part[i];

    WCOREPOP;  /* local frees */
  }
  else {
    for (i=0; i<ctrl->npes; i++) {
      for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
        part[j] = home[j] = i;
    }
  }

  /* Ensure that the initial partitioning is legal */
  for (i=0; i<agraph->nvtxs; i++) {
    if (part[i] >= ctrl->nparts)
      part[i] = home[i] = part[i] % ctrl->nparts;
    if (part[i] < 0)
      part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
  }
  /****************************************/
  /****************************************/

  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      rprintf(ctrl, "input cut: %"PRIDX", balance: ", ComputeSerialEdgeCut(agraph)));
  for (i=0; i<agraph->ncon; i++)
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

  /****************************************/
  /* Split the processors into two groups */
  /****************************************/
  sr = (ctrl->mype % 2 == 0) ? 1 : 0;
  gd = (ctrl->mype % 2 == 1) ? 1 : 0;

  if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
    sr = 1;
    gd = 0;
  }

  sr_pe = 0;
  gd_pe = 1;

  gkMPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);

  if (sr == 1) { /* Half of the processors do scratch-remap */
    ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, npes), 1);
    gkMPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
    gkMPI_Comm_rank(srcomm, &srmype);
    gkMPI_Comm_size(srcomm, &srnpes);

    METIS_SetDefaultOptions(moptions);
    moptions[METIS_OPTION_SEED] = ctrl->sync + (mype % ngroups) + 1;

    tpwgts  = ctrl->tpwgts;
    tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

    iset(nvtxs, 0, lwhere);
    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = srnpes;
    while (lnpes > 1 && lnparts > 1) {
      PASSERT(ctrl, agraph->nvtxs > 1);
      /* determine the weights of the two partitions as a function of the 
         weight of the target partition weights */
      for (j=(lnparts>>1), i=0; i<ncon; i++) {
        tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
        tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
        wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
        tpwgts2[i]      *= wsum;
        tpwgts2[ncon+i] *= wsum;
      }

      METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, 
            agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, 
            &twoparts, tpwgts2, NULL, moptions, &edgecut, part);

      /* pick one of the branches */
      if (srmype < fpe+lnpes/2) {
        KeepPart(ctrl, agraph, part, 0);
        lnpes   = lnpes/2;
        lnparts = lnparts/2;
      }
      else {
        KeepPart(ctrl, agraph, part, 1);
        fpart   = fpart + lnparts/2;
        fpe     = fpe + lnpes/2;
        lnpes   = lnpes - lnpes/2;
        lnparts = lnparts - lnparts/2;
      }
    }

    if (lnparts == 1) { /* Case in which srnpes is greater or equal to nparts */
      /* Only the first process will assign labels (for the reduction to work) */
      if (srmype == fpe) {
        for (i=0; i<agraph->nvtxs; i++) 
          lwhere[agraph->label[i]] = fpart;
      }
    }
    else { /* Case in which srnpes is smaller than nparts */
      /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
      tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
      for (j=0; j<ncon; j++) {
        for (wsum=0.0, i=0; i<lnparts; i++) {
          tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
          wsum += tpwgts[i*ncon+j];
        }
        for (wsum=1.0/wsum, i=0; i<lnparts; i++)
          tpwgts[i*ncon+j] *= wsum;
      }

      METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
	    agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
            &edgecut, part);

      for (i=0; i<agraph->nvtxs; i++) 
        lwhere[agraph->label[i]] = fpart + part[i];
    }

    gkMPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_T, MPI_SUM, srcomm);

    edgecut = ComputeSerialEdgeCut(&cgraph);
    ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
    lbsum = rsum(ncon, lbvec, 1);
    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ipcomm);
    gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm);
    lpecost.rank = ctrl->mype;
    lpecost.cost = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) {
      if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon))
        lpecost.cost = (double)edgecut;
      else
        lpecost.cost = (double)max_cut + lbsum;
    }
    gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ipcomm);

    if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) 
      gkMPI_Send((void *)part, nvtxs, IDX_T, sr_pe, 1, ctrl->comm);

    if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) 
      gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status);

    if (ctrl->mype == sr_pe) {
      icopy(nvtxs, part, lwhere);
      SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
    }

    gkMPI_Comm_free(&srcomm);
  }