Example #1
0
/* 3D Cartesian ordered checkerboard blocking communication */
void blocking_exchange(MC* mc, int *myRecvCount, int *mySendCount){
  int *mycoords = mc->grid.proc_coords;
  int *nabes = mc->grid.nabes;
  MPI_Datatype ParticleType = mc->ParticleType;
  MPI_Status stat;
  int i;
  Particle *p;
  p = mc->particles;
  Particle *recvBuf;

  /* set the pointer for where incoming particles will be received */
  recvBuf = p+mc->nparticles;

  /*now set the recv pointers locally on each proc. */
  for (i=1;i<(mc->nprocs);++i) displ[i] = displ[i-1] + myRecvCount[i-1];

  /*and for convenience the send pointers locally on each proc. */
  for (i=1;i<(mc->nprocs);++i) sdispl[i] = sdispl[i-1] + mySendCount[i-1];

  /* Left/right ordered exchange */
  if (mycoords[0] % 2 == 0) { 
    MPI_Send(p+sdispl[nabes[0]], mySendCount[nabes[0]], ParticleType, nabes[0], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[1]], myRecvCount[nabes[1]], ParticleType, nabes[1], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[1]], mySendCount[nabes[1]], ParticleType, nabes[1], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[0]], myRecvCount[nabes[0]], ParticleType, nabes[0], 0, MPI_COMM_WORLD, &stat);
  }
  else {
    MPI_Recv(recvBuf+displ[nabes[1]], myRecvCount[nabes[1]], ParticleType, nabes[1], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[0]], mySendCount[nabes[0]], ParticleType, nabes[0], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[0]], myRecvCount[nabes[0]], ParticleType, nabes[0], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[1]], mySendCount[nabes[1]], ParticleType, nabes[1], 0, MPI_COMM_WORLD);
  }
  /* Up/down ordered exchange */
  if (mycoords[1] % 2 == 0) {   
    MPI_Send(p+sdispl[nabes[2]], mySendCount[nabes[2]], ParticleType, nabes[2], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[3]], myRecvCount[nabes[3]], ParticleType, nabes[3], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[3]], mySendCount[nabes[3]], ParticleType, nabes[3], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[2]], myRecvCount[nabes[2]], ParticleType, nabes[2], 0, MPI_COMM_WORLD, &stat);
  }
  else {
    MPI_Recv(recvBuf+displ[nabes[3]], myRecvCount[nabes[3]], ParticleType, nabes[3], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[2]], mySendCount[nabes[2]], ParticleType, nabes[2], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[2]], myRecvCount[nabes[2]], ParticleType, nabes[2], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[3]], mySendCount[nabes[3]], ParticleType, nabes[3], 0, MPI_COMM_WORLD);
    }
  /* Back/front ordered exchange */
  if (mycoords[2] % 2 == 0) { 
    MPI_Send(p+sdispl[nabes[4]], mySendCount[nabes[4]], ParticleType, nabes[4], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[5]], myRecvCount[nabes[5]], ParticleType, nabes[5], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[5]], mySendCount[nabes[5]], ParticleType, nabes[5], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[4]], myRecvCount[nabes[4]], ParticleType, nabes[4], 0, MPI_COMM_WORLD, &stat);
  }
  else {
    MPI_Recv(recvBuf+displ[nabes[5]], myRecvCount[nabes[5]], ParticleType, nabes[5], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[4]], mySendCount[nabes[4]], ParticleType, nabes[4], 0, MPI_COMM_WORLD);
    MPI_Recv(recvBuf+displ[nabes[4]], myRecvCount[nabes[4]], ParticleType, nabes[4], 0, MPI_COMM_WORLD, &stat);
    MPI_Send(p+sdispl[nabes[5]], mySendCount[nabes[5]], ParticleType, nabes[5], 0, MPI_COMM_WORLD);
  }

  /* Update particle count to include recvd particles in preparation for elim_sent() */
  mc->nparticles += isum(myRecvCount,mc->nprocs);
}
Example #2
0
void CreateCoarseGraph_Global(ctrl_t *ctrl, graph_t *graph, idx_t cnvtxs)
{
  idx_t h, i, j, k, l, ii, jj, ll, nnbrs, nvtxs, nedges, ncon;
  idx_t firstvtx, lastvtx, cfirstvtx, clastvtx, otherlastvtx;
  idx_t npes=ctrl->npes, mype=ctrl->mype;
  idx_t cnedges, nsend, nrecv, nkeepsize, nrecvsize, nsendsize, v, u;
  idx_t *xadj, *adjncy, *adjwgt, *vwgt, *vsize, *vtxdist, *home, *where;
  idx_t *match, *cmap;
  idx_t *cxadj, *cadjncy, *cadjwgt, *cvwgt, *cvsize = NULL, *chome = NULL, 
          *cwhere = NULL, *cvtxdist;
  idx_t *rsizes, *ssizes, *rlens, *slens, *rgraph, *sgraph, *perm;
  idx_t *peind, *recvptr, *recvind;
  real_t *nvwgt, *cnvwgt;
  graph_t *cgraph;
  ikv_t *scand, *rcand;
  idx_t htable[LHTSIZE], htableidx[LHTSIZE];

  WCOREPUSH;

  nvtxs  = graph->nvtxs;
  ncon   = graph->ncon;
  xadj   = graph->xadj;
  vwgt   = graph->vwgt;
  vsize  = graph->vsize;
  nvwgt  = graph->nvwgt;
  home   = graph->home;
  where  = graph->where;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  match  = graph->match;

  vtxdist  = graph->vtxdist;
  firstvtx = vtxdist[mype];
  lastvtx  = vtxdist[mype+1];

  cmap = graph->cmap = ismalloc(nvtxs+graph->nrecv, -1, "Global_CreateCoarseGraph: cmap");

  nnbrs   = graph->nnbrs;
  peind   = graph->peind;
  recvind = graph->recvind;
  recvptr = graph->recvptr;

  /* Initialize the coarser graph */
  cgraph = CreateGraph();
  cgraph->nvtxs  = cnvtxs;
  cgraph->ncon   = ncon;
  cgraph->level  = graph->level+1;
  cgraph->finer  = graph;
  graph->coarser = cgraph;


  /*************************************************************
  * Obtain the vtxdist of the coarser graph 
  **************************************************************/
  cvtxdist = cgraph->vtxdist = imalloc(npes+1, "Global_CreateCoarseGraph: cvtxdist");
  cvtxdist[npes] = cnvtxs;  /* Use last position in the cvtxdist as a temp buffer */

  gkMPI_Allgather((void *)(cvtxdist+npes), 1, IDX_T, (void *)cvtxdist, 1, 
      IDX_T, ctrl->comm);

  MAKECSR(i, npes, cvtxdist);

  cgraph->gnvtxs = cvtxdist[npes];

#ifdef DEBUG_CONTRACT
  PrintVector(ctrl, npes+1, 0, cvtxdist, "cvtxdist");
#endif


  /*************************************************************
  * Construct the cmap vector 
  **************************************************************/
  cfirstvtx = cvtxdist[mype];
  clastvtx  = cvtxdist[mype+1];

  /* Create the cmap of what you know so far locally. */
  for (cnvtxs=0, i=0; i<nvtxs; i++) {
    if (match[i] >= KEEP_BIT) {
      k = match[i] - KEEP_BIT;
      if (k>=firstvtx && k<firstvtx+i)
        continue;  /* Both (i,k) are local and i has been matched via the (k,i) side */

      cmap[i] = cfirstvtx + cnvtxs++;
      if (k != firstvtx+i && (k>=firstvtx && k<lastvtx)) { /* I'm matched locally */
        cmap[k-firstvtx] = cmap[i];
        match[k-firstvtx] += KEEP_BIT;  /* Add the KEEP_BIT to simplify coding */
      }
    }
  }
  PASSERT(ctrl, cnvtxs == clastvtx-cfirstvtx);

  CommInterfaceData(ctrl, graph, cmap, cmap+nvtxs);

  /* Update the cmap of the locally stored vertices that will go away. 
   * The remote processor assigned cmap for them */
  for (i=0; i<nvtxs; i++) {
    if (match[i] < KEEP_BIT) { /* Only vertices that go away satisfy this*/
      cmap[i] = cmap[nvtxs+BSearch(graph->nrecv, recvind, match[i])];
    }
  }

  CommInterfaceData(ctrl, graph, cmap, cmap+nvtxs);


#ifndef NDEBUG
  for (i=0; i<nvtxs+graph->nrecv; i++) {
    if (cmap[i] == -1) 
      errexit("cmap[%"PRIDX"] == -1\n", i);
  }
#endif


#ifdef DEBUG_CONTRACT
  PrintVector(ctrl, nvtxs, firstvtx, cmap, "Cmap");
#endif


  /*************************************************************
  * Determine how many adjcency lists you need to send/receive.
  **************************************************************/
  /* first pass: determine sizes */
  for (nsend=0, nrecv=0, i=0; i<nvtxs; i++) {
    if (match[i] < KEEP_BIT) /* This is going away */
      nsend++;
    else {
      k = match[i]-KEEP_BIT;
      if (k<firstvtx || k>=lastvtx) /* This is comming from afar */
        nrecv++;
    }
  }

  scand = ikvwspacemalloc(ctrl, nsend);
  rcand = graph->rcand = ikvmalloc(nrecv, "CreateCoarseGraph: rcand");

  /* second pass: place them in the appropriate arrays */
  nkeepsize = nsend = nrecv = 0;
  for (i=0; i<nvtxs; i++) {
    if (match[i] < KEEP_BIT) { /* This is going away */
      scand[nsend].key = match[i];
      scand[nsend].val = i;
      nsend++;
    }
    else {
      nkeepsize += (xadj[i+1]-xadj[i]);

      k = match[i]-KEEP_BIT;
      if (k<firstvtx || k>=lastvtx) { /* This is comming from afar */
        rcand[nrecv].key = k;
        rcand[nrecv].val = cmap[i] - cfirstvtx;  /* Set it for use during the partition projection */
        PASSERT(ctrl, rcand[nrecv].val>=0 && rcand[nrecv].val<cnvtxs);
        nrecv++;
      }
    }
  }


#ifdef DEBUG_CONTRACT
  PrintPairs(ctrl, nsend, scand, "scand");
  PrintPairs(ctrl, nrecv, rcand, "rcand");
#endif

  /***************************************************************
  * Determine how many lists and their sizes  you will send and 
  * received for each of the neighboring PEs
  ****************************************************************/
  rlens = graph->rlens = imalloc(nnbrs+1, "CreateCoarseGraph: graph->rlens");
  slens = graph->slens = imalloc(nnbrs+1, "CreateCoarseGraph: graph->slens");

  rsizes = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs));
  ssizes = iset(nnbrs, 0, iwspacemalloc(ctrl, nnbrs));

  /* Take care the sending data first */
  ikvsortii(nsend, scand);
  slens[0] = 0;
  for (k=i=0; i<nnbrs; i++) {
    otherlastvtx = vtxdist[peind[i]+1];
    for (; k<nsend && scand[k].key < otherlastvtx; k++)
      ssizes[i] += (xadj[scand[k].val+1]-xadj[scand[k].val]);
    slens[i+1] = k;
  }

  /* Take care the receiving data next. You cannot yet determine the rsizes[] */
  ikvsortii(nrecv, rcand);
  rlens[0] = 0;
  for (k=i=0; i<nnbrs; i++) {
    otherlastvtx = vtxdist[peind[i]+1];
    for (; k<nrecv && rcand[k].key < otherlastvtx; k++);
    rlens[i+1] = k;
  }

#ifdef DEBUG_CONTRACT
  PrintVector(ctrl, nnbrs+1, 0, slens, "slens");
  PrintVector(ctrl, nnbrs+1, 0, rlens, "rlens");
#endif

  /***************************************************************
  * Exchange size information
  ****************************************************************/
  /* Issue the receives first. */
  for (i=0; i<nnbrs; i++) {
    if (rlens[i+1]-rlens[i] > 0)  /* Issue a receive only if you are getting something */
      gkMPI_Irecv((void *)(rsizes+i), 1, IDX_T, peind[i], 1, ctrl->comm, ctrl->rreq+i);
  }

  /* Take care the sending data next */
  for (i=0; i<nnbrs; i++) {
    if (slens[i+1]-slens[i] > 0)  /* Issue a send only if you are sending something */
      gkMPI_Isend((void *)(ssizes+i), 1, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i);
  }

  /* OK, now get into the loop waiting for the operations to finish */
  for (i=0; i<nnbrs; i++) {
    if (rlens[i+1]-rlens[i] > 0)  
      gkMPI_Wait(ctrl->rreq+i, &ctrl->status);
  }
  for (i=0; i<nnbrs; i++) {
    if (slens[i+1]-slens[i] > 0)  
      gkMPI_Wait(ctrl->sreq+i, &ctrl->status);
  }


#ifdef DEBUG_CONTRACT
  PrintVector(ctrl, nnbrs, 0, rsizes, "rsizes");
  PrintVector(ctrl, nnbrs, 0, ssizes, "ssizes");
#endif

  /*************************************************************
  * Allocate memory for received/sent graphs and start sending 
  * and receiving data.
  * rgraph and sgraph is a different data structure than CSR
  * to facilitate single message exchange.
  **************************************************************/
  nrecvsize = isum(nnbrs, rsizes, 1);
  nsendsize = isum(nnbrs, ssizes, 1);
  rgraph = iwspacemalloc(ctrl, (4+ncon)*nrecv+2*nrecvsize);

  WCOREPUSH;  /* for freeing sgraph right away */
  sgraph = iwspacemalloc(ctrl, (4+ncon)*nsend+2*nsendsize);

  /* Deal with the received portion first */
  for (l=i=0; i<nnbrs; i++) {
    /* Issue a receive only if you are getting something */
    if (rlens[i+1]-rlens[i] > 0) {
      gkMPI_Irecv((void *)(rgraph+l), (4+ncon)*(rlens[i+1]-rlens[i])+2*rsizes[i], 
          IDX_T, peind[i], 1, ctrl->comm, ctrl->rreq+i);
      l += (4+ncon)*(rlens[i+1]-rlens[i])+2*rsizes[i];
    }
  }


  /* Deal with the sent portion now */
  for (ll=l=i=0; i<nnbrs; i++) {
    if (slens[i+1]-slens[i] > 0) {  /* Issue a send only if you are sending something */
      for (k=slens[i]; k<slens[i+1]; k++) {
        ii = scand[k].val;
        sgraph[ll++] = firstvtx+ii;
        sgraph[ll++] = xadj[ii+1]-xadj[ii];
        for (h=0; h<ncon; h++)
          sgraph[ll++] = vwgt[ii*ncon+h];
        sgraph[ll++] = (ctrl->partType == STATIC_PARTITION || ctrl->partType == ORDER_PARTITION 
                        ? -1 : vsize[ii]);
        sgraph[ll++] = (ctrl->partType == STATIC_PARTITION || ctrl->partType == ORDER_PARTITION 
                        ? -1 : home[ii]);
        for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
          sgraph[ll++] = cmap[adjncy[jj]];
          sgraph[ll++] = adjwgt[jj];
        }
      }

      PASSERT(ctrl, ll-l == (4+ncon)*(slens[i+1]-slens[i])+2*ssizes[i]);

      /*myprintf(ctrl, "Sending to pe:%"PRIDX", %"PRIDX" lists of size %"PRIDX"\n", peind[i], slens[i+1]-slens[i], ssizes[i]); */
      gkMPI_Isend((void *)(sgraph+l), ll-l, IDX_T, peind[i], 1, ctrl->comm, ctrl->sreq+i);
      l = ll;
    }
  }

  /* OK, now get into the loop waiting for the operations to finish */
  for (i=0; i<nnbrs; i++) {
    if (rlens[i+1]-rlens[i] > 0)  
      gkMPI_Wait(ctrl->rreq+i, &ctrl->status);
  }
  for (i=0; i<nnbrs; i++) {
    if (slens[i+1]-slens[i] > 0)  
      gkMPI_Wait(ctrl->sreq+i, &ctrl->status);
  }


#ifdef DEBUG_CONTRACT
  rprintf(ctrl, "Graphs were sent!\n");
  PrintTransferedGraphs(ctrl, nnbrs, peind, slens, rlens, sgraph, rgraph);
#endif

  WCOREPOP;  /* free sgraph */

  /*************************************************************
  * Setup the mapping from indices returned by BSearch to 
  * those that are actually stored 
  **************************************************************/
  perm = iset(graph->nrecv, -1, iwspacemalloc(ctrl, graph->nrecv));
  for (j=i=0; i<nrecv; i++) {
    perm[BSearch(graph->nrecv, recvind, rgraph[j])] = j+1;
    j += (4+ncon)+2*rgraph[j+1];
  }

  /*************************************************************
  * Finally, create the coarser graph
  **************************************************************/
  /* Allocate memory for the coarser graph, and fire up coarsening */
  cxadj   = cgraph->xadj  = imalloc(cnvtxs+1, "CreateCoarserGraph: cxadj");
  cvwgt   = cgraph->vwgt  = imalloc(cnvtxs*ncon, "CreateCoarserGraph: cvwgt");
  cnvwgt  = cgraph->nvwgt = rmalloc(cnvtxs*ncon, "CreateCoarserGraph: cnvwgt");
  if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) {
    cvsize = cgraph->vsize = imalloc(cnvtxs, "CreateCoarserGraph: cvsize");
    chome  = cgraph->home  = imalloc(cnvtxs, "CreateCoarserGraph: chome");
  }
  if (where != NULL)
    cwhere = cgraph->where = imalloc(cnvtxs, "CreateCoarserGraph: cwhere");

  /* these are just upper bound estimates for now */
  cadjncy = iwspacemalloc(ctrl, nkeepsize+nrecvsize);
  cadjwgt = iwspacemalloc(ctrl, nkeepsize+nrecvsize);

  iset(LHTSIZE, -1, htable);

  cxadj[0] = cnvtxs = cnedges = 0;
  for (i=0; i<nvtxs; i++) {
    if (match[i] >= KEEP_BIT) {
      v = firstvtx+i; 
      u = match[i]-KEEP_BIT;

      if (u>=firstvtx && u<lastvtx && v > u) 
        continue;  /* I have already collapsed it as (u,v) */

      /* Collapse the v vertex first, which you know is local */
      for (h=0; h<ncon; h++)
        cvwgt[cnvtxs*ncon+h] = vwgt[i*ncon+h];
      if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) {
        cvsize[cnvtxs] = vsize[i];
        chome[cnvtxs]  = home[i];
      }
      if (where != NULL)
        cwhere[cnvtxs] = where[i];
      nedges = 0;

      /* Collapse the v (i) vertex first */
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        k = cmap[adjncy[j]];
        if (k < 0)
          printf("k=%d\n", (int)k);
        if (k != cfirstvtx+cnvtxs) {  /* If this is not an internal edge */
          l = k&MASK;
          if (htable[l] == -1) { /* Seeing this for first time */
            htable[l] = k;
            htableidx[l] = cnedges+nedges;
            cadjncy[cnedges+nedges] = k; 
            cadjwgt[cnedges+nedges++] = adjwgt[j];
          }
          else if (htable[l] == k) {
            cadjwgt[htableidx[l]] += adjwgt[j];
          }
          else { /* Now you have to go and do a search. Expensive case */
            for (l=0; l<nedges; l++) {
              if (cadjncy[cnedges+l] == k)
                break;
            }
            if (l < nedges) {
              cadjwgt[cnedges+l] += adjwgt[j];
            }
            else {
              cadjncy[cnedges+nedges] = k; 
              cadjwgt[cnedges+nedges++] = adjwgt[j];
            }
          }
        }
      }

      /* Collapse the u vertex next */
      if (v != u) { 
        if (u>=firstvtx && u<lastvtx) { /* Local vertex */
          u -= firstvtx;
          for (h=0; h<ncon; h++)
            cvwgt[cnvtxs*ncon+h] += vwgt[u*ncon+h];

          if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) 
            cvsize[cnvtxs] += vsize[u];

          for (j=xadj[u]; j<xadj[u+1]; j++) {
            k = cmap[adjncy[j]];
            if (k != cfirstvtx+cnvtxs) {  /* If this is not an internal edge */
              l = k&MASK;
              if (htable[l] == -1) { /* Seeing this for first time */
                htable[l] = k;
                htableidx[l] = cnedges+nedges;
                cadjncy[cnedges+nedges] = k; 
                cadjwgt[cnedges+nedges] = adjwgt[j];
                nedges++;
              }
              else if (htable[l] == k) {
                cadjwgt[htableidx[l]] += adjwgt[j];
              }
              else { /* Now you have to go and do a search. Expensive case */
                for (l=0; l<nedges; l++) {
                  if (cadjncy[cnedges+l] == k)
                    break;
                }
                if (l < nedges) {
                  cadjwgt[cnedges+l] += adjwgt[j];
                }
                else {
                  cadjncy[cnedges+nedges] = k; 
                  cadjwgt[cnedges+nedges] = adjwgt[j];
                  nedges++;
                }
              }
            }
          }
        }
        else { /* Remote vertex */
          u = perm[BSearch(graph->nrecv, recvind, u)];

          for (h=0; h<ncon; h++)
            /* Remember that the +1 stores the vertex weight */
            cvwgt[cnvtxs*ncon+h] += rgraph[(u+1)+h];

          if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) {
            cvsize[cnvtxs] += rgraph[u+1+ncon];
            chome[cnvtxs] = rgraph[u+2+ncon];
          }

          for (j=0; j<rgraph[u]; j++) {
            k = rgraph[u+3+ncon+2*j];
            if (k != cfirstvtx+cnvtxs) {  /* If this is not an internal edge */
              l = k&MASK;
              if (htable[l] == -1) { /* Seeing this for first time */
                htable[l] = k;
                htableidx[l] = cnedges+nedges;
                cadjncy[cnedges+nedges] = k; 
                cadjwgt[cnedges+nedges] = rgraph[u+3+ncon+2*j+1];
                nedges++;
              }
              else if (htable[l] == k) {
                cadjwgt[htableidx[l]] += rgraph[u+3+ncon+2*j+1];
              }
              else { /* Now you have to go and do a search. Expensive case */
                for (l=0; l<nedges; l++) {
                  if (cadjncy[cnedges+l] == k)
                    break;
                }
                if (l < nedges) {
                  cadjwgt[cnedges+l] += rgraph[u+3+ncon+2*j+1];
                }
                else {
                  cadjncy[cnedges+nedges] = k; 
                  cadjwgt[cnedges+nedges] = rgraph[u+3+ncon+2*j+1];
                  nedges++;
                }
              }
            }
          }
        }
      }

      cnedges += nedges;
      for (j=cxadj[cnvtxs]; j<cnedges; j++)
        htable[cadjncy[j]&MASK] = -1;  /* reset the htable */
      cxadj[++cnvtxs] = cnedges;
    }
  }

  cgraph->nedges = cnedges;

  /* ADD:  In order to keep from having to change this too much */
  /* ADD:  I kept vwgt array and recomputed nvwgt for each coarser graph */
  for (j=0; j<cnvtxs; j++) {
    for (h=0; h<ncon; h++)
      cgraph->nvwgt[j*ncon+h] = ctrl->invtvwgts[h]*cvwgt[j*ncon+h];
  }

  cgraph->adjncy = imalloc(cnedges, "CreateCoarserGraph: cadjncy");
  cgraph->adjwgt = imalloc(cnedges, "CreateCoarserGraph: cadjwgt");
  icopy(cnedges, cadjncy, cgraph->adjncy);
  icopy(cnedges, cadjwgt, cgraph->adjwgt);

  WCOREPOP;

  /* Note that graph->where works fine even if it is NULL */
  gk_free((void **)&graph->where, LTERM);

}
Example #3
0
void EliminateSubDomainEdges(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, ii, j, k, ncon, nparts, scheme, pid_from, pid_to, me, other, nvtxs, 
        total, max, avg, totalout, nind=0, ncand=0, ncand2, target, target2, 
        nadd, bestnadd=0;
  idx_t min, move, *cpwgt;
  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, 
        *mypmat, *otherpmat, *kpmat, *ind;
  idx_t *nads, **adids, **adwgts;
  ikv_t *cand, *cand2;
  ipq_t queue;
  real_t *tpwgts, badfactor=1.4;
  idx_t *pptr, *pind;
  idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL;  /* volume specific work arrays */

  WCOREPUSH;

  nvtxs  = graph->nvtxs;
  ncon   = graph->ncon;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  vwgt   = graph->vwgt;
  adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt);

  where = graph->where;
  pwgts = graph->pwgts;  /* We assume that this is properly initialized */

  nparts = ctrl->nparts;
  tpwgts = ctrl->tpwgts;

  cpwgt     = iwspacemalloc(ctrl, ncon);
  maxpwgt   = iwspacemalloc(ctrl, nparts*ncon);
  ind       = iwspacemalloc(ctrl, nvtxs);
  otherpmat = iset(nparts, 0, iwspacemalloc(ctrl, nparts));

  cand  = ikvwspacemalloc(ctrl, nparts);
  cand2 = ikvwspacemalloc(ctrl, nparts);

  pptr = iwspacemalloc(ctrl, nparts+1);
  pind = iwspacemalloc(ctrl, nvtxs);
  iarray2csr(nvtxs, nparts, where, pptr, pind);

  if (ctrl->objtype == METIS_OBJTYPE_VOL) {
    /* Vol-refinement specific working arrays */
    modind  = iwspacemalloc(ctrl, nvtxs);
    vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
    pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
  }


  /* Compute the pmat matrix and ndoms */
  ComputeSubDomainGraph(ctrl, graph);

  nads   = ctrl->nads;
  adids  = ctrl->adids;
  adwgts = ctrl->adwgts;

  mypmat = iset(nparts, 0, ctrl->pvec1);
  kpmat  = iset(nparts, 0, ctrl->pvec2);

  /* Compute the maximum allowed weight for each domain */
  for (i=0; i<nparts; i++) {
    for (j=0; j<ncon; j++)
      maxpwgt[i*ncon+j] = 
          (ncon == 1 ? 1.25 : 1.025)*tpwgts[i]*graph->tvwgt[j]*ctrl->ubfactors[j];
  }

  ipqInit(&queue, nparts);

  /* Get into the loop eliminating subdomain connections */
  while (1) {
    total = isum(nparts, nads, 1);
    avg   = total/nparts;
    max   = nads[iargmax(nparts, nads)];

    IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
          printf("Adjacent Subdomain Stats: Total: %3"PRIDX", "
                 "Max: %3"PRIDX"[%zu], Avg: %3"PRIDX"\n", 
                 total, max, iargmax(nparts, nads), avg)); 

    if (max < badfactor*avg)
      break;

    /* Add the subdomains that you will try to reduce their connectivity */
    ipqReset(&queue);
    for (i=0; i<nparts; i++) {
      if (nads[i] >= avg + (max-avg)/2)
        ipqInsert(&queue, i, nads[i]);
    }

    move = 0;
    while ((me = ipqGetTop(&queue)) != -1) {
      totalout = isum(nads[me], adwgts[me], 1);

      for (ncand2=0, i=0; i<nads[me]; i++) {
        mypmat[adids[me][i]] = adwgts[me][i];

        /* keep track of the weakly connected adjacent subdomains */
        if (2*nads[me]*adwgts[me][i] < totalout) {
          cand2[ncand2].val   = adids[me][i];
          cand2[ncand2++].key = adwgts[me][i];
        }
      }

      IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
            printf("Me: %"PRIDX", Degree: %4"PRIDX", TotalOut: %"PRIDX",\n", 
                me, nads[me], totalout));

      /* Sort the connections according to their cut */
      ikvsorti(ncand2, cand2);

      /* Two schemes are used for eliminating subdomain edges.
         The first, tries to eliminate subdomain edges by moving remote groups 
         of vertices to subdomains that 'me' is already connected to.
         The second, tries to eliminate subdomain edges by moving entire sets of 
         my vertices that connect to the 'other' subdomain to a subdomain that 
         I'm already connected to.
         These two schemes are applied in sequence. */
      target = target2 = -1;
      for (scheme=0; scheme<2; scheme++) {
        for (min=0; min<ncand2; min++) {
          other = cand2[min].val;

          /* pid_from is the subdomain from where the vertices will be removed.
             pid_to is the adjacent subdomain to pid_from that defines the 
             (me, other) subdomain edge that needs to be removed */
          if (scheme == 0) {
            pid_from = other;
            pid_to   = me;
          }
          else {
            pid_from  = me;
            pid_to    = other;
          }
  
          /* Go and find the vertices in 'other' that are connected in 'me' */
          for (nind=0, ii=pptr[pid_from]; ii<pptr[pid_from+1]; ii++) {
            i = pind[ii];
            ASSERT(where[i] == pid_from);
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              if (where[adjncy[j]] == pid_to) {
                ind[nind++] = i;
                break;
              }
            }
          }
  
          /* Go and construct the otherpmat to see where these nind vertices are 
             connected to */
          iset(ncon, 0, cpwgt);
          for (ncand=0, ii=0; ii<nind; ii++) {
            i = ind[ii];
            iaxpy(ncon, 1, vwgt+i*ncon, 1, cpwgt, 1);
    
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              if ((k = where[adjncy[j]]) == pid_from)
                continue;
              if (otherpmat[k] == 0)
                cand[ncand++].val = k;
              otherpmat[k] += (adjwgt ? adjwgt[j] : 1);
            }
          }
    
          for (i=0; i<ncand; i++) {
            cand[i].key = otherpmat[cand[i].val];
            ASSERT(cand[i].key > 0);
          }

          ikvsortd(ncand, cand);
    
          IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
                printf("\tMinOut: %4"PRIDX", to: %3"PRIDX", TtlWgt: %5"PRIDX"[#:%"PRIDX"]\n", 
                    mypmat[other], other, isum(ncon, cpwgt, 1), nind));

          /* Go through and select the first domain that is common with 'me', and does
             not increase the nads[target] higher than nads[me], subject to the maxpwgt
             constraint. Traversal is done from the mostly connected to the least. */
          for (i=0; i<ncand; i++) {
            k = cand[i].val;
    
            if (mypmat[k] > 0) {
              /* Check if balance will go off */
              if (!ivecaxpylez(ncon, 1, cpwgt, pwgts+k*ncon, maxpwgt+k*ncon))
                continue;
    
              /* get a dense vector out of k's connectivity */
              for (j=0; j<nads[k]; j++) 
                kpmat[adids[k][j]] = adwgts[k][j];
    
              /* Check if the move to domain k will increase the nads of another
                 subdomain j that the set of vertices being moved are connected
                 to but domain k is not connected to. */
              for (j=0; j<nparts; j++) {
                if (otherpmat[j] > 0 && kpmat[j] == 0 && nads[j]+1 >= nads[me]) 
                  break;
              }
  
              /* There were no bad second level effects. See if you can find a
                 subdomain to move to. */
              if (j == nparts) { 
                for (nadd=0, j=0; j<nparts; j++) {
                  if (otherpmat[j] > 0 && kpmat[j] == 0)
                    nadd++;
                }
    
                IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
                      printf("\t\tto=%"PRIDX", nadd=%"PRIDX", %"PRIDX"\n", k, nadd, nads[k]));
    
                if (nads[k]+nadd < nads[me]) {
                  if (target2 == -1 || nads[target2]+bestnadd > nads[k]+nadd ||
                      (nads[target2]+bestnadd == nads[k]+nadd && bestnadd > nadd)) {
                    target2  = k;
                    bestnadd = nadd;
                  }
                }
  
                if (nadd == 0) 
                  target = k;
              }

              /* reset kpmat for the next iteration */
              for (j=0; j<nads[k]; j++) 
                kpmat[adids[k][j]] = 0;
            }

            if (target != -1)
              break;
          }

          /* reset the otherpmat for the next iteration */
          for (i=0; i<ncand; i++) 
            otherpmat[cand[i].val] = 0;

          if (target == -1 && target2 != -1)
            target = target2;
    
          if (target != -1) {
            IFSET(ctrl->dbglvl, METIS_DBG_CONNINFO, 
                printf("\t\tScheme: %"PRIDX". Moving to %"PRIDX"\n", scheme, target));
            move = 1;
            break;
          }
        }

        if (target != -1)
          break;  /* A move was found. No need to try the other scheme */
      }

      /* reset the mypmat for next iteration */
      for (i=0; i<nads[me]; i++) 
        mypmat[adids[me][i]] = 0;

      /* Note that once a target is found the above loops exit right away. So the
         following variables are valid */
      if (target != -1) {
        switch (ctrl->objtype) {
          case METIS_OBJTYPE_CUT:
            MoveGroupMinConnForCut(ctrl, graph, target, nind, ind);
            break;
          case METIS_OBJTYPE_VOL:
            MoveGroupMinConnForVol(ctrl, graph, target, nind, ind, vmarker, 
                pmarker, modind);
            break;
          default:
            gk_errexit(SIGERR, "Unknown objtype of %d\n", ctrl->objtype);
        }

        /* Update the csr representation of the partitioning vector */
        iarray2csr(nvtxs, nparts, where, pptr, pind);
      }
    }

    if (move == 0)
      break;
  }

  ipqFree(&queue);

  WCOREPOP;
}
Example #4
0
void EliminateComponents(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, ii, j, jj, k, me, nparts, nvtxs, ncon, ncmps, other, 
        ncand, target;
  idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts;
  idx_t *cptr, *cind, *cpvec, *pcptr, *pcind, *cwhere;
  idx_t cid, bestcid, *cwgt, *bestcwgt;
  idx_t ntodo, oldntodo, *todo;
  rkv_t *cand;
  real_t *tpwgts;
  idx_t *vmarker=NULL, *pmarker=NULL, *modind=NULL;  /* volume specific work arrays */

  WCOREPUSH;

  nvtxs  = graph->nvtxs;
  ncon   = graph->ncon;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  vwgt   = graph->vwgt;
  adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt);

  where = graph->where;
  pwgts = graph->pwgts;

  nparts = ctrl->nparts;
  tpwgts = ctrl->tpwgts;

  cptr = iwspacemalloc(ctrl, nvtxs+1);
  cind = iwspacemalloc(ctrl, nvtxs);

  ncmps = FindPartitionInducedComponents(graph, where, cptr, cind);

  IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
      printf("I found %"PRIDX" components, for this %"PRIDX"-way partition\n", 
          ncmps, nparts)); 

  /* There are more components than partitions */
  if (ncmps > nparts) {
    cwgt     = iwspacemalloc(ctrl, ncon);
    bestcwgt = iwspacemalloc(ctrl, ncon);
    cpvec    = iwspacemalloc(ctrl, nparts);
    pcptr    = iset(nparts+1, 0, iwspacemalloc(ctrl, nparts+1));
    pcind    = iwspacemalloc(ctrl, ncmps);
    cwhere   = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
    todo     = iwspacemalloc(ctrl, ncmps);
    cand     = (rkv_t *)wspacemalloc(ctrl, nparts*sizeof(rkv_t));

    if (ctrl->objtype == METIS_OBJTYPE_VOL) {
      /* Vol-refinement specific working arrays */
      modind  = iwspacemalloc(ctrl, nvtxs);
      vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
      pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
    }


    /* Get a CSR representation of the components-2-partitions mapping */
    for (i=0; i<ncmps; i++) 
      pcptr[where[cind[cptr[i]]]]++;
    MAKECSR(i, nparts, pcptr);
    for (i=0; i<ncmps; i++) 
      pcind[pcptr[where[cind[cptr[i]]]]++] = i;
    SHIFTCSR(i, nparts, pcptr);

    /* Assign the heaviest component of each partition to its original partition */
    for (ntodo=0, i=0; i<nparts; i++) {
      if (pcptr[i+1]-pcptr[i] == 1)
        bestcid = pcind[pcptr[i]];
      else {
        for (bestcid=-1, j=pcptr[i]; j<pcptr[i+1]; j++) {
          cid = pcind[j];
          iset(ncon, 0, cwgt);
          for (ii=cptr[cid]; ii<cptr[cid+1]; ii++)
            iaxpy(ncon, 1, vwgt+cind[ii]*ncon, 1, cwgt, 1);
          if (bestcid == -1 || isum(ncon, bestcwgt, 1) < isum(ncon, cwgt, 1)) {
            bestcid  = cid;
            icopy(ncon, cwgt, bestcwgt);
          }
        }
        /* Keep track of those that need to be dealt with */
        for (j=pcptr[i]; j<pcptr[i+1]; j++) {
          if (pcind[j] != bestcid)
            todo[ntodo++] = pcind[j];
        }
      }

      for (j=cptr[bestcid]; j<cptr[bestcid+1]; j++) {
        ASSERT(where[cind[j]] == i);
        cwhere[cind[j]] = i;
      }
    }


    while (ntodo > 0) {
      oldntodo = ntodo;
      for (i=0; i<ntodo; i++) {
        cid = todo[i];
        me = where[cind[cptr[cid]]];  /* Get the domain of this component */

        /* Determine the weight of the block to be moved */
        iset(ncon, 0, cwgt);
        for (j=cptr[cid]; j<cptr[cid+1]; j++) 
          iaxpy(ncon, 1, vwgt+cind[j]*ncon, 1, cwgt, 1);

        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
            printf("Trying to move %"PRIDX" [%"PRIDX"] from %"PRIDX"\n", 
                cid, isum(ncon, cwgt, 1), me)); 

        /* Determine the connectivity */
        iset(nparts, 0, cpvec);
        for (j=cptr[cid]; j<cptr[cid+1]; j++) {
          ii = cind[j];
          for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) 
            if (cwhere[adjncy[jj]] != -1)
              cpvec[cwhere[adjncy[jj]]] += (adjwgt ? adjwgt[jj] : 1);
        }

        /* Put the neighbors into a cand[] array for sorting */
        for (ncand=0, j=0; j<nparts; j++) {
          if (cpvec[j] > 0) {
            cand[ncand].key   = cpvec[j];
            cand[ncand++].val = j;
          }
        }
        if (ncand == 0)
          continue;

        rkvsortd(ncand, cand);

        /* Limit the moves to only the top candidates, which are defined as 
           those with connectivity at least 50% of the best.
           This applies only when ncon=1, as for multi-constraint, balancing
           will be hard. */
        if (ncon == 1) {
          for (j=1; j<ncand; j++) {
            if (cand[j].key < .5*cand[0].key)
              break;
          }
          ncand = j;
        }
      
        /* Now among those, select the one with the best balance */
        target = cand[0].val;
        for (j=1; j<ncand; j++) {
          if (BetterBalanceKWay(ncon, cwgt, ctrl->ubfactors,
                1, pwgts+target*ncon, ctrl->pijbm+target*ncon,
                1, pwgts+cand[j].val*ncon, ctrl->pijbm+cand[j].val*ncon))
            target = cand[j].val;
        }

        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, 
            printf("\tMoving it to %"PRIDX" [%"PRIDX"] [%"PRIDX"]\n", target, cpvec[target], ncand));

        /* Note that as a result of a previous movement, a connected component may
           now will like to stay to its original partition */
        if (target != me) {
          switch (ctrl->objtype) {
            case METIS_OBJTYPE_CUT:
              MoveGroupContigForCut(ctrl, graph, target, cid, cptr, cind);
              break;

            case METIS_OBJTYPE_VOL:
              MoveGroupContigForVol(ctrl, graph, target, cid, cptr, cind, 
                  vmarker, pmarker, modind);
              break;

            default:
              gk_errexit(SIGERR, "Unknown objtype %d\n", ctrl->objtype);
          }
        }

        /* Update the cwhere vector */
        for (j=cptr[cid]; j<cptr[cid+1]; j++) 
          cwhere[cind[j]] = target;

        todo[i] = todo[--ntodo];
      }
      if (oldntodo == ntodo) {
        IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, printf("Stopped at ntodo: %"PRIDX"\n", ntodo));
        break;
      }
    }

    for (i=0; i<nvtxs; i++)
      ASSERT(where[i] == cwhere[i]);

  }

  WCOREPOP;
}
Example #5
0
void
dollar(char *cp)
{
	cp = nextc(cp);
	switch(*cp) {
	default:
		Bprint(bioout, "?\n");
		break;

	case 'c':
	case 'C':
		stktrace(*cp);
		break;

	case 'b':
		dobplist();
		break;

	case 'r':
		dumpreg();
		break;

	case 'R':
		dumpreg();
		/* fall through */

	case 'f':
		dumpfreg();
		break;

	case 'F':
		dumpdreg();
		break;

	case 'q':
		exits(0);
		break;

	case 'Q':
		isum();
		segsum();
		break;

	case 't':
		cp++;
		switch(*cp) {
		default:
			Bprint(bioout, ":t[0sic]\n");
			break;
		case '\0':
			trace = 1;
			break;
		case '0':
			trace = 0;
			sysdbg = 0;
			calltree = 0;
			break;
		case 's':
			sysdbg = 1;
			break;
		case 'i':
			trace = 1;
			break;
		case 'c':
			calltree = 1;
			break;
		}
		break;

	case 'i':
		cp++;
		switch(*cp) {
		default:
			Bprint(bioout, "$i[isa]\n");
			break;
		case 'i':
			isum();
			break;
		case 's':
			segsum();
			break;
		case 'a':
			isum();
			segsum();
			iprofile();
			break;
		case 'p':
			iprofile();
			break;
		}
	}
}
Example #6
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void RedoMyLink(ctrl_t *ctrl, graph_t *graph, idx_t *home, idx_t me,
                idx_t you, real_t *flows, real_t *sr_cost, real_t *sr_lbavg)
{
    idx_t h, i, r;
    idx_t nvtxs, nedges, ncon;
    idx_t  pass, lastseed, totalv;
    idx_t *xadj, *adjncy, *adjwgt, *where, *vsize;
    idx_t *costwhere, *lbwhere, *selectwhere;
    idx_t *ed, *id, *bndptr, *bndind, *perm;
    real_t *nvwgt, mycost;
    real_t lbavg, *lbvec;
    real_t best_lbavg, other_lbavg = -1.0, bestcost, othercost = -1.0;
    real_t *npwgts, *pwgts, *tpwgts;
    real_t ipc_factor, redist_factor, ftmp;
    idx_t mype;
    gkMPI_Comm_rank(MPI_COMM_WORLD, &mype);


    WCOREPUSH;

    nvtxs  = graph->nvtxs;
    nedges = graph->nedges;
    ncon   = graph->ncon;
    xadj   = graph->xadj;
    nvwgt  = graph->nvwgt;
    vsize  = graph->vsize;
    adjncy = graph->adjncy;
    adjwgt = graph->adjwgt;
    where  = graph->where;

    ipc_factor    = ctrl->ipc_factor;
    redist_factor = ctrl->redist_factor;

    /* set up data structures */
    id     = graph->sendind = iwspacemalloc(ctrl, nvtxs);
    ed     = graph->recvind = iwspacemalloc(ctrl, nvtxs);
    bndptr = graph->sendptr = iwspacemalloc(ctrl, nvtxs);
    bndind = graph->recvptr = iwspacemalloc(ctrl, nvtxs);

    costwhere = iwspacemalloc(ctrl, nvtxs);
    lbwhere   = iwspacemalloc(ctrl, nvtxs);
    perm      = iwspacemalloc(ctrl, nvtxs);

    lbvec  = rwspacemalloc(ctrl, ncon);
    pwgts  = rset(2*ncon, 0.0, rwspacemalloc(ctrl, 2*ncon));
    npwgts = rwspacemalloc(ctrl, 2*ncon);
    tpwgts = rwspacemalloc(ctrl, 2*ncon);

    graph->gnpwgts = npwgts;

    RandomPermute(nvtxs, perm, 1);
    icopy(nvtxs, where, costwhere);
    icopy(nvtxs, where, lbwhere);

    /* compute target pwgts */
    for (h=0; h<ncon; h++) {
        tpwgts[h]      = -1.0*flows[h];
        tpwgts[ncon+h] = flows[h];
    }

    for (i=0; i<nvtxs; i++) {
        if (where[i] == me) {
            for (h=0; h<ncon; h++) {
                tpwgts[h] += nvwgt[i*ncon+h];
                pwgts[h]  += nvwgt[i*ncon+h];
            }
        }
        else {
            ASSERT(where[i] == you);
            for (h=0; h<ncon; h++) {
                tpwgts[ncon+h] += nvwgt[i*ncon+h];
                pwgts[ncon+h]  += nvwgt[i*ncon+h];
            }
        }
    }

    /* we don't want any weights to be less than zero */
    for (h=0; h<ncon; h++) {
        if (tpwgts[h] < 0.0) {
            tpwgts[ncon+h] += tpwgts[h];
            tpwgts[h] = 0.0;
        }

        if (tpwgts[ncon+h] < 0.0) {
            tpwgts[h] += tpwgts[ncon+h];
            tpwgts[ncon+h] = 0.0;
        }
    }


    /* now compute new bisection */
    bestcost = (real_t)isum(nedges, adjwgt, 1)*ipc_factor +
               (real_t)isum(nvtxs, vsize, 1)*redist_factor;
    best_lbavg = 10.0;

    lastseed = 0;
    for (pass=N_MOC_REDO_PASSES; pass>0; pass--) {
        iset(nvtxs, 1, where);

        /* find seed vertices */
        r = perm[lastseed] % nvtxs;
        lastseed = (lastseed+1) % nvtxs;
        where[r] = 0;

        Mc_Serial_Compute2WayPartitionParams(ctrl, graph);
        Mc_Serial_Init2WayBalance(ctrl, graph, tpwgts);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);
        Mc_Serial_Balance2Way(ctrl, graph, tpwgts, 1.02);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);

        for (i=0; i<nvtxs; i++)
            where[i] = (where[i] == 0) ? me : you;

        for (i=0; i<ncon; i++) {
            ftmp = (pwgts[i]+pwgts[ncon+i])/2.0;
            if (ftmp != 0.0)
                lbvec[i] = fabs(npwgts[i]-tpwgts[i])/ftmp;
            else
                lbvec[i] = 0.0;
        }
        lbavg = ravg(ncon, lbvec);

        totalv = 0;
        for (i=0; i<nvtxs; i++)
            if (where[i] != home[i])
                totalv += vsize[i];

        mycost = (real_t)(graph->mincut)*ipc_factor + (real_t)totalv*redist_factor;

        if (bestcost >= mycost) {
            bestcost = mycost;
            other_lbavg = lbavg;
            icopy(nvtxs, where, costwhere);
        }

        if (best_lbavg >= lbavg) {
            best_lbavg = lbavg;
            othercost = mycost;
            icopy(nvtxs, where, lbwhere);
        }
    }

    if (other_lbavg <= .05) {
        selectwhere = costwhere;
        *sr_cost = bestcost;
        *sr_lbavg = other_lbavg;
    }
    else {
        selectwhere = lbwhere;
        *sr_cost = othercost;
        *sr_lbavg = best_lbavg;
    }

    icopy(nvtxs, selectwhere, where);

    WCOREPOP;
}