示例#1
0
文件: wave.c 项目: certik/libmesh
/*************************************************************************
* This function performs a k-way directed diffusion
**************************************************************************/
real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home)
{
  idx_t ii, i, j, k, l, nvtxs, nedges, nparts;
  idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize;
  idx_t npasses, first, second, third, mind, maxd;
  idx_t *xadj, *adjncy, *adjwgt, *where, *perm;
  idx_t *rowptr, *colind, *ed, *psize;
  real_t *transfer, *tmpvec;
  real_t balance = -1.0, *load, *solution, *workspace;
  real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor;
  matrix_t matrix;
  ikv_t *cand;
  idx_t ndirty, nclean, dptr, clean;

  nvtxs        = graph->nvtxs;
  nedges       = graph->nedges;
  xadj         = graph->xadj;
  nvwgt        = graph->nvwgt;
  adjncy       = graph->adjncy;
  adjwgt       = graph->adjwgt;
  where        = graph->where;
  nparts       = ctrl->nparts;
  ubfactor     = ctrl->ubvec[0];
  matrix.nrows = nparts;

  flowFactor = 0.35;
  flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
  flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
  flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;

  /* allocate memory */
  solution                   = rmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution");
  tmpvec                     = solution + nparts;
  npwgts                     = solution + 2*nparts;
  load                       = solution + 3*nparts;
  matrix.values              = solution + 4*nparts;
  transfer = matrix.transfer = solution + 4*nparts + nedges;

  perm                   = imalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm");
  ed                     = perm + nvtxs;
  psize                  = perm + 2*nvtxs;
  rowptr = matrix.rowptr = perm + 2*nvtxs + nparts;
  colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1;

  /*GKTODO - Potential problem with this malloc */
  wsize     = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace");
  cand      = ikvmalloc(nvtxs, "WavefrontDiffusion: cand");


  /*****************************/
  /* Populate empty subdomains */
  /*****************************/
  iset(nparts, 0, psize);
  for (i=0; i<nvtxs; i++) 
    psize[where[i]]++;

  mind = iargmin(nparts, psize);
  maxd = iargmax(nparts, psize);
  if (psize[mind] == 0) {
    for (i=0; i<nvtxs; i++) {
      k = (RandomInRange(nvtxs)+i)%nvtxs; 
      if (where[k] == maxd) {
        where[k] = mind;
        psize[mind]++;
        psize[maxd]--;
        break;
      }
    }
  }

  iset(nvtxs, 0, ed);
  rset(nparts, 0.0, npwgts);
  for (i=0; i<nvtxs; i++) {
    npwgts[where[i]] += nvwgt[i];
    for (j=xadj[i]; j<xadj[i+1]; j++)
      ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);
  }

  ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
  done = 0;


  /* zero out the tmpvec array */
  rset(nparts, 0.0, tmpvec);

  npasses = gk_min(nparts/2, NGD_PASSES);
  for (l=0; l<npasses; l++) {
    /* Set-up and solve the diffusion equation */
    nswaps = 0;

    /************************/
    /* Solve flow equations */
    /************************/
    SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);

    /* check for disconnected subdomains */
    for(i=0; i<matrix.nrows; i++) {
      if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
        cost = (real_t)(ctrl->mype); 
	goto CleanUpAndExit;
      }
    }

    ConjGrad2(&matrix, load, solution, 0.001, workspace);
    ComputeTransferVector(1, &matrix, solution, transfer, 0);

    GetThreeMax(nparts, load, &first, &second, &third);

    if (l%3 == 0) {
      FastRandomPermute(nvtxs, perm, 1);
    }
    else {
      /*****************************/
      /* move dirty vertices first */
      /*****************************/
      ndirty = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])
          ndirty++;
      }

      dptr = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])
          perm[dptr++] = i;
        else
          perm[ndirty++] = i;
      }

      PASSERT(ctrl, ndirty == nvtxs);
      ndirty = dptr;
      nclean = nvtxs-dptr;
      FastRandomPermute(ndirty, perm, 0);
      FastRandomPermute(nclean, perm+ndirty, 0);
    }

    if (ctrl->mype == 0) {
      for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
        i = perm[ii];
        if (ed[i] != 0) {
          cand[k].key = -ed[i];
          cand[k++].val = i;
        }
        else {
          cand[--j].key = 0;
          cand[j].val = i;
        }
      }
      ikvsorti(k, cand);
    }


    for (ii=0; ii<nvtxs/3; ii++) {
      i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
      from = where[i];

      /* don't move out the last vertex in a subdomain */
      if (psize[from] == 1)
        continue;

      clean = (from == home[i]) ? 1 : 0;

      /* only move from top three or dirty vertices */
      if (from != first && from != second && from != third && clean)
        continue;

      /* Scatter the sparse transfer row into the dense tmpvec row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
        tmpvec[colind[j]] = transfer[j];

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        to = where[adjncy[j]];
        if (from != to) {
          if (tmpvec[to] > (flowFactor * nvwgt[i])) {
            tmpvec[to] -= nvwgt[i];
            INC_DEC(psize[to], psize[from], 1);
            INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
            INC_DEC(load[to], load[from], nvwgt[i]);
            where[i] = to;
            nswaps++;

            /* Update external degrees */
            ed[i] = 0;
            for (k=xadj[i]; k<xadj[i+1]; k++) {
              edge = adjncy[k];
              ed[i] += (to != where[edge] ? adjwgt[k] : 0);

              if (where[edge] == from)
                ed[edge] += adjwgt[k];
              if (where[edge] == to)
                ed[edge] -= adjwgt[k];
            }
            break;
          }
        }
      }

      /* Gather the dense tmpvec row into the sparse transfer row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
        transfer[j] = tmpvec[colind[j]];
        tmpvec[colind[j]] = 0.0;
      }
      ASSERT(fabs(rsum(nparts, tmpvec, 1)) < .0001)
    }

    if (l % 2 == 1) {
      balance = rmax(nparts, npwgts)*nparts;
      if (balance < ubfactor + 0.035)
        done = 1;

      if (GlobalSESum(ctrl, done) > 0)
        break;

      noswaps = (nswaps > 0) ? 0 : 1;
      if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)
        break;

    }
  }

  graph->mincut = ComputeSerialEdgeCut(graph);
  totalv        = Mc_ComputeSerialTotalV(graph, home);
  cost          = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv;


CleanUpAndExit:
  gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);

  return cost;
}
示例#2
0
/*************************************************************************
* This function computes cuts and balance information
**************************************************************************/
void ComputePartitionInfoBipartite(graph_t *graph, idx_t nparts, idx_t *where)
{
  idx_t i, j, k, nvtxs, ncon, mustfree=0;
  idx_t *xadj, *adjncy, *vwgt, *vsize, *adjwgt, *kpwgts, *tmpptr;
  idx_t *padjncy, *padjwgt, *padjcut;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  vwgt = graph->vwgt;
  vsize = graph->vsize;
  adjwgt = graph->adjwgt;

  if (vwgt == NULL) {
    vwgt = graph->vwgt = ismalloc(nvtxs, 1, "vwgt");
    mustfree = 1;
  }
  if (adjwgt == NULL) {
    adjwgt = graph->adjwgt = ismalloc(xadj[nvtxs], 1, "adjwgt");
    mustfree += 2;
  }

  printf("%"PRIDX"-way Cut: %5"PRIDX", Vol: %5"PRIDX", ", nparts, ComputeCut(graph, where), ComputeVolume(graph, where));

  /* Compute balance information */
  kpwgts = ismalloc(ncon*nparts, 0, "ComputePartitionInfo: kpwgts");

  for (i=0; i<nvtxs; i++) {
    for (j=0; j<ncon; j++) 
      kpwgts[where[i]*ncon+j] += vwgt[i*ncon+j];
  }

  if (ncon == 1) {
    printf("\tBalance: %5.3"PRREAL" out of %5.3"PRREAL"\n", 
            1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)),
            1.0*nparts*vwgt[iargmax(nvtxs, vwgt)]/(1.0*isum(nparts, kpwgts, 1)));
  }
  else {
    printf("\tBalance:");
    for (j=0; j<ncon; j++) 
      printf(" (%5.3"PRREAL" out of %5.3"PRREAL")", 
            1.0*nparts*kpwgts[ncon*iargmax_strd(nparts, kpwgts+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)),
            1.0*nparts*vwgt[ncon*iargmax_strd(nvtxs, vwgt+j, ncon)+j]/(1.0*isum(nparts, kpwgts+j, ncon)));
    printf("\n");
  }


  /* Compute p-adjncy information */
  padjncy = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjncy");
  padjwgt = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt");
  padjcut = ismalloc(nparts*nparts, 0, "ComputePartitionInfo: padjwgt");

  iset(nparts, 0, kpwgts);
  for (i=0; i<nvtxs; i++) {
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      if (where[i] != where[adjncy[j]]) {
        padjncy[where[i]*nparts+where[adjncy[j]]] = 1;
        padjcut[where[i]*nparts+where[adjncy[j]]] += adjwgt[j];
        if (kpwgts[where[adjncy[j]]] == 0) {
          padjwgt[where[i]*nparts+where[adjncy[j]]] += vsize[i];
          kpwgts[where[adjncy[j]]] = 1;
        }
      }
    }
    for (j=xadj[i]; j<xadj[i+1]; j++) 
      kpwgts[where[adjncy[j]]] = 0;
  }

  for (i=0; i<nparts; i++)
    kpwgts[i] = isum(nparts, padjncy+i*nparts, 1);
  printf("Min/Max/Avg/Bal # of adjacent     subdomains: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n",
    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)));

  for (i=0; i<nparts; i++)
    kpwgts[i] = isum(nparts, padjcut+i*nparts, 1);
  printf("Min/Max/Avg/Bal # of adjacent subdomain cuts: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL"\n",
    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)));

  for (i=0; i<nparts; i++)
    kpwgts[i] = isum(nparts, padjwgt+i*nparts, 1);
  printf("Min/Max/Avg/Bal/Frac # of interface    nodes: %5"PRIDX" %5"PRIDX" %5"PRIDX" %7.3"PRREAL" %7.3"PRREAL"\n",
    kpwgts[iargmin(nparts, kpwgts)], kpwgts[iargmax(nparts, kpwgts)], isum(nparts, kpwgts, 1)/nparts, 
    1.0*nparts*kpwgts[iargmax(nparts, kpwgts)]/(1.0*isum(nparts, kpwgts, 1)), 1.0*isum(nparts, kpwgts, 1)/(1.0*nvtxs));


  if (mustfree == 1 || mustfree == 3) {
    gk_free((void **)&vwgt, LTERM);
    graph->vwgt = NULL;
  }
  if (mustfree == 2 || mustfree == 3) {
    gk_free((void **)&adjwgt, LTERM);
    graph->adjwgt = NULL;
  }

  gk_free((void **)&kpwgts, &padjncy, &padjwgt, &padjcut, LTERM);
}