예제 #1
0
파일: match.c 프로젝트: mrklein/ParMGridGen
/*************************************************************************
* This function finds a matching using the HEM heuristic
**************************************************************************/
void Match_HEM(CtrlType *ctrl, GraphType *graph)
{
  int i, ii, j, k, nvtxs, cnvtxs, maxidx, dim;
  idxtype *xadj, *vwgt, *adjncy;
  idxtype *match, *cmap, *perm, *tperm;
  realtype curwgt, maxwgt;
  realtype *vvol, *vsurf, *adjwgt, *adjwgtsum;

  dim       = ctrl->dim;
  nvtxs     = graph->nvtxs;
  xadj      = graph->xadj;
  vwgt      = graph->vwgt;
  vvol      = graph->vvol;
  vsurf     = graph->vsurf;
  adjncy    = graph->adjncy;
  adjwgt    = graph->adjwgt;
  adjwgtsum = graph->adjwgtsum;

  cmap = graph->cmap = idxsmalloc(nvtxs, -1, "cmap");
  match = idxsmalloc(nvtxs, -1, "match");

  perm = idxmalloc(nvtxs, "perm");
  tperm = idxmalloc(nvtxs, "tperm");

  RandomPermute(nvtxs, tperm, 1);
  BucketSortKeysInc(nvtxs, vwgt[iamax(nvtxs, vwgt)], vwgt, tperm, perm);
  /* RandomPermute(nvtxs, perm, 1);  */

  cnvtxs = 0;

  /* Compute a heavy-edge style matching giving preferance to small vertices */
  for (ii=0; ii<nvtxs; ii++) {
     i = perm[ii];

     if (match[i] == UNMATCHED) {
       maxidx = i;
       maxwgt = 0.0;

       /* Find a heavy-edge matching, subject to maxvwgt constraints */
       for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = adjncy[j];
          curwgt = 1.0/ARATIO2(dim, vsurf[i]+vsurf[k]+adjwgtsum[i]+adjwgtsum[k]-
                   2.0*adjwgt[j], vvol[i]+vvol[k]);
          if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize &&
              curwgt > maxwgt) {
            maxwgt = curwgt;
            maxidx = k;
          }
       }

       cmap[i] = cmap[maxidx] = cnvtxs++;
       match[i] = maxidx;
       match[maxidx] = i;
     }
  }

  CreateCoarseGraph(graph, cnvtxs, match, perm);

  IMfree((void**)&tperm, &perm, &match, LTERM);
}
예제 #2
0
파일: grsetup.c 프로젝트: luyukunphy/namd
/*************************************************************************
* This function computes movement statistics for adaptive refinement
* schemes
**************************************************************************/
void ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout)
{
    int i, j, nvtxs;
    idxtype *vwgt, *where;
    idxtype *lpvtxs, *gpvtxs;

    nvtxs = graph->nvtxs;
    vwgt = graph->vwgt;
    where = graph->where;

    lpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: lpvtxs");
    gpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs");

    for (j=i=0; i<nvtxs; i++) {
        lpvtxs[where[i]]++;
        if (where[i] != ctrl->mype)
            j++;
    }

    /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */

    MPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm);

    *nmoved = GlobalSESum(ctrl, j);
    *maxout = GlobalSEMax(ctrl, j);
    *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j));

    GKfree((void **)&lpvtxs, (void **)&gpvtxs, LTERM);
}
예제 #3
0
파일: mesh.c 프로젝트: netsym/minissf
/*****************************************************************************
* This function creates the nodal graph of a finite element mesh
******************************************************************************/
void QUADNODALMETIS(int nelmnts, int nvtxs, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy)
{
    int i, j, jj, k, kk, /*kkk, l, m, n,*/ nedges;
    idxtype *nptr, *nind;
    idxtype *mark;
    int table[4][2] = {{1, 3},
        {0, 2},
        {1, 3},
        {0, 2}
    };

    /* Construct the node-element list first */
    nptr = idxsmalloc(nvtxs+1, 0, "QUADNODALMETIS: nptr");
    for (j=4*nelmnts, i=0; i<j; i++)
        nptr[elmnts[i]]++;
    MAKECSR(i, nvtxs, nptr);

    nind = idxmalloc(nptr[nvtxs], "QUADNODALMETIS: nind");
    for (k=i=0; i<nelmnts; i++) {
        for (j=0; j<4; j++, k++)
            nind[nptr[elmnts[k]]++] = i;
    }
    for (i=nvtxs; i>0; i--)
        nptr[i] = nptr[i-1];
    nptr[0] = 0;


    mark = idxsmalloc(nvtxs, -1, "QUADNODALMETIS: mark");

    nedges = dxadj[0] = 0;
    for (i=0; i<nvtxs; i++) {
        mark[i] = i;
        for (j=nptr[i]; j<nptr[i+1]; j++) {
            jj=4*nind[j];
            for (k=0; k<4; k++) {
                if (elmnts[jj+k] == i)
                    break;
            }
            ASSERT(k != 4);

            /* You found the index, now go and put the 2 neighbors */
            kk = elmnts[jj+table[k][0]];
            if (mark[kk] != i) {
                mark[kk] = i;
                dadjncy[nedges++] = kk;
            }
            kk = elmnts[jj+table[k][1]];
            if (mark[kk] != i) {
                mark[kk] = i;
                dadjncy[nedges++] = kk;
            }
        }
        dxadj[i+1] = nedges;
    }

    free(mark);
    free(nptr);
    free(nind);

}
예제 #4
0
파일: grsetup.c 프로젝트: luyukunphy/namd
/*************************************************************************
* This function setsup the CtrlType structure
**************************************************************************/
GraphType *Moc_SetUpGraph(CtrlType *ctrl, int ncon, idxtype *vtxdist, idxtype *xadj,
                          idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, int *wgtflag)
{
    int i, j;
    GraphType *graph;
    int ltvwgts[MAXNCON];

    graph = CreateGraph();
    graph->level   = 0;
    graph->gnvtxs  = vtxdist[ctrl->npes];
    graph->nvtxs   = vtxdist[ctrl->mype+1]-vtxdist[ctrl->mype];
    graph->ncon    = ncon;
    graph->nedges  = xadj[graph->nvtxs];
    graph->xadj    = xadj;
    graph->vwgt    = vwgt;
    graph->adjncy  = adjncy;
    graph->adjwgt  = adjwgt;
    graph->vtxdist = vtxdist;


    if (((*wgtflag)&2) == 0)
        graph->vwgt = idxsmalloc(graph->nvtxs*ncon, 1, "Par_KMetis: vwgt");

    if (((*wgtflag)&1) == 0)
        graph->adjwgt = idxsmalloc(graph->nedges, 1, "Par_KMetis: adjwgt");

    /* compute tvwgts */
    for (j=0; j<ncon; j++)
        ltvwgts[j] = 0;

    for (i=0; i<graph->nvtxs; i++)
        for (j=0; j<ncon; j++)
            ltvwgts[j] += graph->vwgt[i*ncon+j];

    for (j=0; j<ncon; j++)
        ctrl->tvwgts[j] = GlobalSESum(ctrl, ltvwgts[j]);

    /* check for zero wgt constraints */
    for (i=0; i<ncon; i++) {
        /* ADD: take care of the case in which tvwgts is zero */
        if (ctrl->tvwgts[i] == 0) {
            rprintf(ctrl, "ERROR: sum weight for constraint %d is zero\n", i);
            MPI_Finalize();
            exit(-1);
        }
    }

    /* compute nvwgts */
    graph->nvwgt = fmalloc(graph->nvtxs*ncon, "graph->nvwgt");
    for (i=0; i<graph->nvtxs; i++) {
        for (j=0; j<ncon; j++)
            graph->nvwgt[i*ncon+j] = (floattype)(graph->vwgt[i*ncon+j]) / (floattype)(ctrl->tvwgts[j]);
    }

    srand(ctrl->seed);

    return graph;
}
/*************************************************************************
* This function computes the normalized cut given the graph and a where vector
**************************************************************************/
float ComputeNCutVector(GraphType *graph, idxtype *where, int
npart,float* ncutVector)
{
  int i, j, cm, nvtxs;
  idxtype *ncut, *degree, *xadj, *adjncy;
  float result;
  idxtype * adjwgt;

  ncut = idxsmalloc(npart, 0, "ComputeNCut: ncut");
  degree = idxsmalloc(npart, 0, "ComputeNCut: degree");
  if ( ncutVector == NULL )
  {
  	ncutVector=(float*)malloc(sizeof(float)*npart);
  }
  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  if (graph->adjwgt == NULL) {
    for (i=0; i<nvtxs; i++) {
      cm = where[i];
      for (j=xadj[i]; j<xadj[i+1]; j++){
	  	if ( adjncy[j] != i )
       		degree[cm] ++; 
        if (cm != where[adjncy[j]])
          ncut[cm] ++;
      }
    }
  }
  else {
    for (i=0; i<nvtxs; i++) {
      cm = where[i];
      for (j=xadj[i]; j<xadj[i+1]; j++){
	  	if ( adjncy[j] != i )
			degree[cm] += adjwgt[j];
        if (cm != where[adjncy[j]])
          ncut[cm] += adjwgt[j];
      }
    }
  }
  int empty = 0;
  result =0;
  for (i=0; i<npart; i++){
    if (degree[i] == 0)
      empty++;
    if (degree[i] >0)
	{
	  ncutVector[i] =ncut[i] *1.0/ degree[i]; 
      result += ncutVector[i];
	}
  }
  //printf("Empty clusters: %d\n", empty);
  free(ncut);
  free(degree);
  return result+empty;
}
예제 #6
0
/*************************************************************************
* This function checks whether a graph is contigous or not
**************************************************************************/
int IsConnected(CtrlType *ctrl, GraphType *graph, int report)
{
  int i, j, k, nvtxs, first, last;
  idxtype *xadj, *adjncy, *touched, *queue;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;

  touched = idxsmalloc(nvtxs, 0, "IsConnected: touched");
  queue = idxmalloc(nvtxs, "IsConnected: queue");

  touched[0] = 1;
  queue[0] = 0;
  first = 0; last = 1;

  while (first < last) {
    i = queue[first++];
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (!touched[k]) {
        queue[last++] = k;
        touched[k] = 1;
      }
    }
  }

  if (first != nvtxs && report)
    printf("The graph is not connected. It has %d disconnected vertices!\n", nvtxs-first);

  return (first == nvtxs ? 1 : 0);
}
예제 #7
0
/*************************************************************************
* This function computes the balance of the partitioning
**************************************************************************/
void ComputePartitionBalance(GraphType *graph, int nparts, idxtype *where, float *ubvec)
{
  int i, j, nvtxs, ncon;
  idxtype *kpwgts, *vwgt;
  /*float balance;*/

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  vwgt = graph->vwgt;

  kpwgts = idxsmalloc(nparts, 0, "ComputePartitionInfo: kpwgts");

  if (vwgt == NULL) {
    for (i=0; i<nvtxs; i++)
      kpwgts[where[i]]++;
    ubvec[0] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*nvtxs);
  }
  else {
    for (j=0; j<ncon; j++) {
      idxset(nparts, 0, kpwgts);
      for (i=0; i<graph->nvtxs; i++)
        kpwgts[where[i]] += vwgt[i*ncon+j];

      ubvec[j] = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts));
    }
  }

  free(kpwgts);

}
예제 #8
0
/*************************************************************************
* This function returns the number of connected components in cptr,cind
* The separator of the graph is used to split it and then find its components.
**************************************************************************/
int FindComponents(CtrlType *ctrl, GraphType *graph, idxtype *cptr, idxtype *cind)
{
  int i, j, k, nvtxs, first, last, nleft, ncmps, wgt;
  idxtype *xadj, *adjncy, *where, *touched, *queue;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  where = graph->where;

  touched = idxsmalloc(nvtxs, 0, "IsConnected: queue");

  for (i=0; i<graph->nbnd; i++)
    touched[graph->bndind[i]] = 1;

  queue = cind;

  nleft = 0;
  for (i=0; i<nvtxs; i++) {
    if (where[i] != 2) 
      nleft++;
  }

  for (i=0; i<nvtxs; i++) {
    if (where[i] != 2)
      break;
  }

  touched[i] = 1;
  queue[0] = i;
  first = 0; last = 1;

  cptr[0] = 0;  /* This actually points to queue */
  ncmps = 0;
  while (first != nleft) {
    if (first == last) { /* Find another starting vertex */
      cptr[++ncmps] = first;
      for (i=0; i<nvtxs; i++) {
        if (!touched[i])
          break;
      }
      queue[last++] = i;
      touched[i] = 1;
    }

    i = queue[first++];
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (!touched[k]) {
        queue[last++] = k;
        touched[k] = 1;
      }
    }
  }
  cptr[++ncmps] = first;

  free(touched);

  return ncmps;
}
예제 #9
0
/*************************************************************************
* This function computes the cut given the graph and a where vector
**************************************************************************/
idxtype ComputeMaxCut(GraphType *graph, idxtype nparts, idxtype *where)
{
  idxtype i, j, maxcut;
  idxtype *cuts;

  cuts = idxsmalloc(nparts, 0, "ComputeMaxCut: cuts");

  if (graph->adjwgt == NULL) {
    for (i=0; i<graph->nvtxs; i++) {
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
        if (where[i] != where[graph->adjncy[j]]) 
          cuts[where[i]]++;
    }
  }
  else {
    for (i=0; i<graph->nvtxs; i++) {
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
        if (where[i] != where[graph->adjncy[j]])
          cuts[where[i]] += graph->adjwgt[j];
    }
  }

  maxcut = cuts[idxargmax(nparts, cuts)];

  mprintf("%D => %D\n", idxargmax(nparts, cuts), maxcut);

  gk_free((void **)&cuts, LTERM);

  return maxcut;
}
예제 #10
0
/*************************************************************************
* This function checks whether or not partition pid is contigous
**************************************************************************/
int IsConnected2(GraphType *graph, int report)
{
  int i, j, k, nvtxs, first, last, nleft, ncmps, wgt;
  idxtype *xadj, *adjncy, *where, *touched, *queue;
  idxtype *cptr;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  where = graph->where;

  touched = idxsmalloc(nvtxs, 0, "IsConnected: touched");
  queue = idxmalloc(nvtxs, "IsConnected: queue");
  cptr = idxmalloc(nvtxs, "IsConnected: cptr");

  nleft = nvtxs;
  touched[0] = 1;
  queue[0] = 0;
  first = 0; last = 1;

  cptr[0] = 0;  /* This actually points to queue */
  ncmps = 0;
  while (first != nleft) {
    if (first == last) { /* Find another starting vertex */
      cptr[++ncmps] = first;
      for (i=0; i<nvtxs; i++) {
        if (!touched[i])
          break;
      }
      queue[last++] = i;
      touched[i] = 1;
    }

    i = queue[first++];
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (!touched[k]) {
        queue[last++] = k;
        touched[k] = 1;
      }
    }
  }
  cptr[++ncmps] = first;

  if (ncmps > 1 && report) {
    printf("%d connected components:\t", ncmps);
    for (i=0; i<ncmps; i++) {
      if (cptr[i+1]-cptr[i] > 200)
        printf("[%5d] ", cptr[i+1]-cptr[i]);
    }
    printf("\n");
  }

  GKfree(&touched, &queue, &cptr, LTERM);

  return (ncmps == 1 ? 1 : 0);
}
예제 #11
0
/*****************************************************************************
* This function creates the nodal graph of a finite element mesh
******************************************************************************/
void TETNODALMETIS(int nelmnts, int nvtxs, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy)
{
   int i, j, jj, k, kk, kkk, l, m, n, nedges;
   idxtype *nptr, *nind;
   idxtype *mark;

   /* Construct the node-element list first */
   nptr = idxsmalloc(nvtxs+1, 0, "TETNODALMETIS: nptr");
   for (j=4*nelmnts, i=0; i<j; i++) 
     nptr[elmnts[i]]++;
   MAKECSR(i, nvtxs, nptr);

   nind = idxmalloc(nptr[nvtxs], "TETNODALMETIS: nind");
   for (k=i=0; i<nelmnts; i++) {
     for (j=0; j<4; j++, k++) 
       nind[nptr[elmnts[k]]++] = i;
   }
   for (i=nvtxs; i>0; i--)
     nptr[i] = nptr[i-1];
   nptr[0] = 0;


   mark = idxsmalloc(nvtxs, -1, "TETNODALMETIS: mark");

   nedges = dxadj[0] = 0;
   for (i=0; i<nvtxs; i++) {
     mark[i] = i;
     for (j=nptr[i]; j<nptr[i+1]; j++) {
       for (jj=4*nind[j], k=0; k<4; k++, jj++) {
         kk = elmnts[jj];
         if (mark[kk] != i) {
           mark[kk] = i;
           dadjncy[nedges++] = kk;
         }
       }
     }
     dxadj[i+1] = nedges;
   }

   free(mark);
   free(nptr);
   free(nind);

}
예제 #12
0
파일: match.c 프로젝트: mrklein/ParMGridGen
/*************************************************************************
* This function finds a matching using the HEM heuristic
**************************************************************************/
void Match_RM(CtrlType *ctrl, GraphType *graph)
{
  int i, ii, j, k, nvtxs, cnvtxs, maxidx;
  idxtype *xadj, *vwgt, *adjncy;
  idxtype *match, *cmap, *perm;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  vwgt = graph->vwgt;
  adjncy = graph->adjncy;

  cmap = graph->cmap = idxsmalloc(nvtxs, -1, "graph->cmap");
  match = idxsmalloc(nvtxs, -1, "match");
  perm = idxmalloc(nvtxs, "perm");

  RandomPermute(nvtxs, perm, 1);

  cnvtxs = 0;
  for (ii=0; ii<nvtxs; ii++) {
     i = perm[ii];

     if (match[i] == UNMATCHED) {
       maxidx = i;

       /* Find a random matching, subject to maxvwgt constraints */
       for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = adjncy[j];
          if (match[k] == UNMATCHED && vwgt[i]+vwgt[k] <= ctrl->maxsize) {
            maxidx = k;
            break;
          }
       }

       cmap[i] = cmap[maxidx] = cnvtxs++;
       match[i] = maxidx;
       match[maxidx] = i;
     }
  }

  CreateCoarseGraph(graph, cnvtxs, match, perm);

  IMfree((void**)&match, &perm, LTERM);
}
예제 #13
0
파일: debug.c 프로젝트: cran/BigQuic
/*************************************************************************
* This function computes the ratio assoc. given the graph and a where vector
**************************************************************************/
float ComputeRAsso(GraphType *graph, idxtype *where, int npart)
{
  int i, j, cm, nvtxs;
  idxtype *rasso, *clusterSize, *xadj, *adjncy;
  float result;
  idxtype * adjwgt;

  rasso = idxsmalloc(npart, 0, "ComputeNCut: ncut");
  clusterSize = idxsmalloc(npart, 0, "ComputeNCut: degree");
  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  for (i=0; i<nvtxs; i++)
    clusterSize[where[i]] ++;
  
  if (graph->adjwgt == NULL) {
    for (i=0; i<nvtxs; i++) {
      cm = where[i];
      for (j=xadj[i]; j<xadj[i+1]; j++)
	if (cm == where[adjncy[j]])
	  rasso[where[adjncy[j]]] ++;
    }
  }
  else {
    for (i=0; i<nvtxs; i++){
      cm = where[i];
      for (j=xadj[i]; j<xadj[i+1]; j++)
	if (cm == where[adjncy[j]])
	  rasso[where[adjncy[j]]] += adjwgt[j];
    }
  }
    
  result =0;
  for (i=0; i<npart; i++){
    if (clusterSize[i] >0)
      result +=  rasso[i] *1.0/ clusterSize[i];
  }
  free(rasso);
  free(clusterSize);
  return result;
}
예제 #14
0
/******************************************************************************
* This function takes a partition vector that is distributed and reads in
* the original graph and computes the edgecut
*******************************************************************************/
int ComputeRealCut2(idxtype *vtxdist, idxtype *mvtxdist, idxtype *part, idxtype *mpart, char *filename, MPI_Comm comm)
{
  int i, j, nvtxs, mype, npes, cut;
  idxtype *xadj, *adjncy, *gpart, *gmpart, *perm, *sizes;
  MPI_Status status;


  MPI_Comm_size(comm, &npes);
  MPI_Comm_rank(comm, &mype);

  if (mype != 0) {
    MPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_DATATYPE, 0, 1, comm);
    MPI_Send((void *)mpart, mvtxdist[mype+1]-mvtxdist[mype], IDX_DATATYPE, 0, 1, comm);
  }
  else {  /* Processor 0 does all the rest */
    gpart = idxmalloc(vtxdist[npes], "ComputeRealCut: gpart");
    idxcopy(vtxdist[1], part, gpart);
    gmpart = idxmalloc(mvtxdist[npes], "ComputeRealCut: gmpart");
    idxcopy(mvtxdist[1], mpart, gmpart);

    for (i=1; i<npes; i++) {
      MPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_DATATYPE, i, 1, comm, &status);
      MPI_Recv((void *)(gmpart+mvtxdist[i]), mvtxdist[i+1]-mvtxdist[i], IDX_DATATYPE, i, 1, comm, &status);
    }

    /* OK, now go and reconstruct the permutation to go from the graph to mgraph */
    perm = idxmalloc(vtxdist[npes], "ComputeRealCut: perm");
    sizes = idxsmalloc(npes+1, 0, "ComputeRealCut: sizes");

    for (i=0; i<vtxdist[npes]; i++)
      sizes[gpart[i]]++;
    MAKECSR(i, npes, sizes);
    for (i=0; i<vtxdist[npes]; i++)
      perm[i] = sizes[gpart[i]]++;

    /* Ok, now read the graph from the file */
    ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy);

    /* OK, now compute the cut */
    for (cut=0, i=0; i<nvtxs; i++) {
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        if (gmpart[perm[i]] != gmpart[perm[adjncy[j]]])
          cut++;
      }
    }
    cut = cut/2;

    GKfree(&gpart, &gmpart, &perm, &sizes, &xadj, &adjncy, LTERM);

    return cut;
  }

  return 0;
}
예제 #15
0
/*************************************************************************
* This function is the entry point for detecting contacts between 
* bounding boxes and surface nodes
**************************************************************************/
void METIS_FindContacts(void *raw_cinfo, idxtype *nboxes, double *boxcoords, idxtype *nparts, 
               idxtype **r_cntptr, idxtype **r_cntind)
{
  idxtype i, ncnts, tncnts, maxtncnts;
  idxtype *cntptr, *cntind, *auxcntind, *stack, *marker;
  ContactInfoType *cinfo;

  cinfo = (ContactInfoType *)raw_cinfo;

  maxtncnts = 6*(*nboxes);
  cntptr    = idxsmalloc(*nboxes+1, 0, "METIS_FindContacts: cntptr");
  cntind    = idxmalloc(maxtncnts, "METIS_FindContacts: cntind");
  auxcntind = idxmalloc(*nparts, "METIS_FindContacts: auxcntind");
  stack     = idxmalloc(cinfo->nnodes, "METIS_FindContacts: stack");
  marker    = idxsmalloc(*nparts, 0, "METIS_FindContacts: marker");
  

  /* Go through each box and determine its contacting partitions */
  for (tncnts=0, i=0; i<*nboxes; i++) {
    ncnts = FindBoxContacts(cinfo, boxcoords+i*6, stack, auxcntind, marker);

    if (ncnts == 0)
      mprintf("CSearchError: Box has no contacts!\n");
  
    if (ncnts + tncnts >= maxtncnts) {
      maxtncnts += (tncnts+ncnts)*(*nboxes-i)/i;
      if ((cntind = (idxtype *)realloc(cntind, maxtncnts*sizeof(idxtype))) == NULL)
        errexit("Realloc failed! of %d words!\n", maxtncnts);
    }
    cntptr[i] = ncnts;
    idxcopy(ncnts, auxcntind, cntind+tncnts);
    tncnts += ncnts;
  }
  MAKECSR(i, *nboxes, cntptr); 

  *r_cntptr = cntptr;
  *r_cntind = cntind;

  gk_free((void **)&auxcntind, &stack, &marker, LTERM);

}
idxtype* getDegreeHistogram(GraphType* graph, int* maxDegree, int
	logScale)
{
	int i;
	*maxDegree=0;
	int maxLogDegree;
	for ( i=0; i<graph->nvtxs; i++ )
	{
		int k;
		if ( (k=(graph->xadj[i+1] - graph->xadj[i])) > *maxDegree )
		{
			*maxDegree = k;
			maxLogDegree = getLogBin(k);
		}
	}

	idxtype* hist;
	if ( logScale > 0 )
	{
		hist = idxsmalloc(maxLogDegree+1, 0,
					"getDegreeHistogram:hist");
	}
	else
	{
		hist = idxsmalloc(*maxDegree+1, 0,
							"getDegreeHistogram:hist");
	}

	for ( i=0; i<graph->nvtxs; i++ )
	{
		int l = graph->xadj[i+1]-graph->xadj[i];
		if ( logScale > 0 )
		{
			l = getLogBin(l);
		}
		hist[l]++;
	}
	
	return hist;
}
idxtype* getWeightsHistogram(GraphType* graph, int* maxWeight, int
	logScale)
{
	int i;
	*maxWeight=0;
	int maxLogWeight;
	for ( i=0; i<graph->xadj[graph->nvtxs]; i++ )
	{
		if ( graph->adjwgt[i] > *maxWeight )
		{
			*maxWeight = graph->adjwgt[i];
			maxLogWeight = getLogBin(graph->adjwgt[i]);
		}
	}

	idxtype* hist;
	if ( logScale > 0 )
	{
		hist = idxsmalloc(maxLogWeight+1, 0,
					"getDegreeHistogram:hist");
	}
	else
	{
		hist = idxsmalloc(*maxWeight+1, 0,
							"getDegreeHistogram:hist");
	}

	for ( i=0; i<graph->xadj[graph->nvtxs]; i++ )
	{
		int l = graph->adjwgt[i];
		if ( logScale > 0 )
		{
			l = getLogBin(l);
		}
		hist[l]++;
	}
	
	return hist;
}
예제 #18
0
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, 
       idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options,
       int *edgecut, idxtype *part, MPI_Comm *comm)
{
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  if (npes == 1) { /* Take care the npes = 1 case */
    idxset(vtxdist[1], 0, part);
    *edgecut = 0;
    return;
  }

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  SetUpCtrl(&ctrl, npes, options, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes);

  graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize");

  PreAllocateMemory(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype));
  AdaptiveUndirected_Partition(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype));
  ReMapGraph(&ctrl, graph, 0, &wspace);

  idxcopy(graph->nvtxs, graph->where, part);
  *edgecut = graph->mincut;

  IMfree((void**)&graph->vsize, LTERM);
  FreeInitialGraphAndRemap(graph, *wgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (*numflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);
}
예제 #19
0
파일: estmem.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function computes the size of the coarse graph
**************************************************************************/
int ComputeCoarseGraphSize(int nvtxs, idxtype *xadj, idxtype *adjncy, int cnvtxs, idxtype *cmap, idxtype *match, idxtype *perm)
{
  int i, j, k, istart, iend, nedges, cnedges, v, u;
  idxtype *htable;

  htable = idxsmalloc(cnvtxs, -1, "htable");

  cnvtxs = cnedges = 0;
  for (i=0; i<nvtxs; i++) {
    v = perm[i];
    if (cmap[v] != cnvtxs) 
      continue;

    htable[cnvtxs] = cnvtxs;

    u = match[v];

    istart = xadj[v];
    iend = xadj[v+1];
    for (j=istart; j<iend; j++) {
      k = cmap[adjncy[j]];
      if (htable[k] != cnvtxs) {
        htable[k] = cnvtxs;
        cnedges++;
      }
    }

    if (v != u) { 
      istart = xadj[u];
      iend = xadj[u+1];
      for (j=istart; j<iend; j++) {
        k = cmap[adjncy[j]];
        if (htable[k] != cnvtxs) {
          htable[k] = cnvtxs;
          cnedges++;
        }
      }
    }
    cnvtxs++;
  }

  GKfree(&htable, LTERM);

  return cnedges;
}
예제 #20
0
/*************************************************************************
* This function computes the subdomain graph
**************************************************************************/
void PrintSubDomainGraph(GraphType *graph, int nparts, idxtype *where)
{
  int i, j, k, me, nvtxs, total, max;
  idxtype *xadj, *adjncy, *adjwgt, *pmat;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  pmat = idxsmalloc(nparts*nparts, 0, "ComputeSubDomainGraph: pmat");

  for (i=0; i<nvtxs; i++) {
    me = where[i];
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (where[k] != me) 
        pmat[me*nparts+where[k]] += adjwgt[j];
    }
  }

  /* printf("Subdomain Info\n"); */
  total = max = 0;
  for (i=0; i<nparts; i++) {
    for (k=0, j=0; j<nparts; j++) {
      if (pmat[i*nparts+j] > 0)
        k++;
    }
    total += k;

    if (k > max)
      max = k;
/*
    printf("%2d -> %2d  ", i, k);
    for (j=0; j<nparts; j++) {
      if (pmat[i*nparts+j] > 0)
        printf("[%2d %4d] ", j, pmat[i*nparts+j]);
    }
    printf("\n");
*/
  }
  printf("Total adjacent subdomains: %d, Max: %d\n", total, max);

  free(pmat);
}
예제 #21
0
/*************************************************************************
* This function computes the balance of the element partitioning
**************************************************************************/
float ComputeElementBalance(int ne, int nparts, idxtype *where)
{
  int i;
  idxtype *kpwgts;
  float balance;

  kpwgts = idxsmalloc(nparts, 0, "ComputeElementBalance: kpwgts");

  for (i=0; i<ne; i++)
    kpwgts[where[i]]++;

  balance = 1.0*nparts*kpwgts[idxamax(nparts, kpwgts)]/(1.0*idxsum(nparts, kpwgts));

  free(kpwgts);

  return balance;

}
예제 #22
0
/*************************************************************************
* This function uses simple counting sort to return a permutation array
* corresponding to the sorted order. The keys are assumed to start from
* 0 and they are positive.  This sorting is used during matching.
**************************************************************************/
void BucketSortKeysInc(int n, int max, idxtype *keys, idxtype *tperm, idxtype *perm)
{
  int i, ii;
  idxtype *counts;

  counts = idxsmalloc(max+2, 0, "BucketSortKeysInc: counts");

  for (i=0; i<n; i++)
    counts[keys[i]]++;
  MAKECSR(i, max+1, counts);

  for (ii=0; ii<n; ii++) {
    i = tperm[ii];
    perm[counts[keys[i]]++] = i;
  }

  free(counts);
}
예제 #23
0
/*************************************************************************
* This function uses simple counting sort to return a permutation array
* corresponding to the sorted order. The keys are agk_fsumed to start from
* 0 and they are positive.  This sorting is used during matching.
**************************************************************************/
void BucketSortKeysInc(idxtype n, idxtype max, idxtype *keys, idxtype *tperm, idxtype *perm)
{
  idxtype i, ii;
  idxtype *counts;

  counts = idxsmalloc(max+2, 0, "BucketSortKeysInc: counts");

  for (i=0; i<n; i++)
    counts[keys[i]]++;
  MAKECSR(i, max+1, counts);

  for (ii=0; ii<n; ii++) {
    i = tperm[ii];
    perm[counts[keys[i]]++] = i;
  }

  gk_free((void **)&counts, LTERM);
}
예제 #24
0
void pingpong(CtrlType *ctrl, GraphType *graph, int nparts, int chain_length, float *tpwgts, float ubfactor, int toplevel)
     // do batch-local search; chain_length is the search length
{

  int nvtxs, nedges, moves, iter;
  idxtype *w;
  //float *m_adjwgt;

  nedges = graph->nedges;
  nvtxs = graph->nvtxs;

  w = idxsmalloc(nvtxs, 0, "pingpong: weight");
  Compute_Weights(ctrl, graph, w);
  //m_adjwgt = fmalloc(nedges, "pingpong: normalized matrix");
  //transform_matrix(ctrl, graph, w, m_adjwgt);

  //printf("Chain length is %d.\n", chain_length);
 
  moves =0;
  iter =0;
  
  //printf("Number of boundary points is %d\n", graph->nbnd);
  do{
    //Weighted_kernel_k_means(ctrl, graph, nparts, w, m_adjwgt, tpwgts, ubfactor);
    Weighted_kernel_k_means(ctrl, graph, nparts, w, tpwgts, ubfactor);
    if (chain_length>0){
      
      //moves = local_search(ctrl, graph, nparts, chain_length, w, m_adjwgt, tpwgts, ubfactor);
      moves = local_search(ctrl, graph, nparts, chain_length, w, tpwgts, ubfactor);
      //printf("Number of local search moves is %d\n", moves);
      //printf("Number of boundary points is %d\n", graph->nbnd);
    }
    iter ++;
    if (iter > MAXITERATIONS)
      break;
  }while(moves >0) ;
  if(memory_saving ==0){
    remove_empty_clusters_l1(ctrl, graph, nparts, w, tpwgts, ubfactor);
    if(toplevel>0)
      remove_empty_clusters_l2(ctrl, graph, nparts, w, tpwgts, ubfactor);
  }
  free(w); 
  //free(m_adjwgt); 
}
예제 #25
0
파일: estmem.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function finds a matching using the HEM heuristic
**************************************************************************/
void EstimateCFraction(int nvtxs, idxtype *xadj, idxtype *adjncy, floattype *vfraction, floattype *efraction)
{
  int i, ii, j, cnvtxs, cnedges, maxidx;
  idxtype *match, *cmap, *perm;

  cmap = idxmalloc(nvtxs, "cmap");
  match = idxsmalloc(nvtxs, UNMATCHED, "match");
  perm = idxmalloc(nvtxs, "perm");
  RandomPermute(nvtxs, perm, 1);

  cnvtxs = 0;
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];

    if (match[i] == UNMATCHED) {  /* Unmatched */
      maxidx = i;

      /* Find a random matching, subject to maxvwgt constraints */
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        if (match[adjncy[j]] == UNMATCHED) {
          maxidx = adjncy[j];
          break;
        }
      }

      cmap[i] = cmap[maxidx] = cnvtxs++;
      match[i] = maxidx;
      match[maxidx] = i;
    }
  }

  cnedges = ComputeCoarseGraphSize(nvtxs, xadj, adjncy, cnvtxs, cmap, match, perm);

  *vfraction = (1.0*cnvtxs)/(1.0*nvtxs);
  *efraction = (1.0*cnedges)/(1.0*xadj[nvtxs]);

  GKfree(&cmap, &match, &perm, LTERM);
}
예제 #26
0
/*************************************************************************
* This function computes movement statistics for adaptive refinement
* schemes
**************************************************************************/
void Mc_ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout)
{
  int i, nvtxs, nparts, myhome;
  idxtype *vwgt, *where;
  idxtype *lend, *gend, *lleft, *gleft, *lstart, *gstart;

  nvtxs = graph->nvtxs;
  vwgt = graph->vwgt;
  where = graph->where;
  nparts = ctrl->nparts;

  lstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lstart");
  gstart = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gstart");
  lleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lleft");
  gleft = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gleft");
  lend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: lend");
  gend = idxsmalloc(nparts, 0, "ComputeMoveStatistics: gend");

  for (i=0; i<nvtxs; i++) {
    myhome = (ctrl->ps_relation == COUPLED) ? ctrl->mype : graph->home[i];
    lstart[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i];
    lend[where[i]] += (graph->vsize == NULL) ? 1 : graph->vsize[i];
    if (where[i] != myhome)
      lleft[myhome] += (graph->vsize == NULL) ? 1 : graph->vsize[i];
  }

  /* PrintVector(ctrl, ctrl->npes, 0, lend, "Lend: "); */

  MPI_Allreduce((void *)lstart, (void *)gstart, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm);
  MPI_Allreduce((void *)lleft, (void *)gleft, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm);
  MPI_Allreduce((void *)lend, (void *)gend, nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm);

  *nmoved = idxsum(nparts, gleft);
  *maxout = gleft[idxamax(nparts, gleft)];
  for (i=0; i<nparts; i++)
    lstart[i] = gend[i]+gleft[i]-gstart[i];
  *maxin = lstart[idxamax(nparts, lstart)];

  GKfree((void **)&lstart, (void **)&gstart, (void **)&lleft, (void **)&gleft, (void **)&lend, (void **)&gend, LTERM);
}
예제 #27
0
파일: kwayfm.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
{
  int h, i, ii, iii, j, k, c;
  int pass, nvtxs, nedges, ncon;
  int nmoves, nmoved, nswaps, nzgswaps;
/*  int gnswaps, gnzgswaps; */
  int me, firstvtx, lastvtx, yourlastvtx;
  int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight;
  int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
  int nlupd, nsupd, nnbrs, nchanged;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  idxtype *where, *tmp_where, *moved;
  floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill;
  idxtype *update, *supdate, *rupdate, *pe_updates;
  idxtype *changed, *perm, *pperm, *htable;
  idxtype *peind, *recvptr, *sendptr;
  KeyValueType *swchanges, *rwchanges;
  RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
  EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
  floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
  int *nupds_pe;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));

  /*************************/
  /* set up common aliases */
  /*************************/
  nvtxs = graph->nvtxs;
  nedges = graph->nedges;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  where   = graph->where;
  rinfo   = graph->rinfo;
  lnpwgts = graph->lnpwgts;
  gnpwgts = graph->gnpwgts;
  ubvec   = ctrl->ubvec;
  tpwgts  = ctrl->tpwgts;

  nnbrs = graph->nnbrs;
  peind = graph->peind;
  recvptr = graph->recvptr;
  sendptr = graph->sendptr;

  changed = idxmalloc(nvtxs, "KWR: changed");
  rwchanges = wspace->pairs;
  swchanges = rwchanges + recvptr[nnbrs];

  /************************************/
  /* set up important data structures */
  /************************************/
  perm = idxmalloc(nvtxs, "KWR: perm");
  pperm = idxmalloc(nparts, "KWR: pperm");

  update = idxmalloc(nvtxs, "KWR: update");
  supdate = wspace->indices;
  rupdate = supdate + recvptr[nnbrs];
  nupds_pe = imalloc(npes, "KWR: nupds_pe");
  htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
  badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");

  for (i=0; i<nparts; i++) {
    for (h=0; h<ncon; h++) {
      badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];
    }
  }

  movewgts = fmalloc(nparts*ncon, "KWR: movewgts");
  ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts");
  pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts");
  overfill = fmalloc(nparts*ncon, "KWR: overfill");
  moved = idxmalloc(nvtxs, "KWR: moved");
  tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
  tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
  tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");

  idxcopy(nvtxs+graph->nrecv, where, tmp_where);
  for (i=0; i<nvtxs; i++) {
    tmp_rinfo[i].id = rinfo[i].id;
    tmp_rinfo[i].ed = rinfo[i].ed;
    tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
    tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];

    for (j=0; j<rinfo[i].ndegrees; j++) {
      tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
      tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;
    }
  }

  nswaps = nzgswaps = 0;
  /*********************************************************/
  /* perform a small number of passes through the vertices */
  /*********************************************************/
  for (pass=0; pass<npasses; pass++) {
    if (mype == 0)
      RandomPermute(nparts, pperm, 1);
    MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
    FastRandomPermute(nvtxs, perm, 1);
    oldcut = graph->mincut;

    /* check to see if the partitioning is imbalanced */
    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    ubavg = savg(ncon, ubvec);
    lbavg = savg(ncon, lbvec);
    imbalanced = (lbavg > ubavg) ? 1 : 0;

    for (c=0; c<2; c++) {
      scopy(ncon*nparts, gnpwgts, ognpwgts);
      sset(ncon*nparts, 0.0, movewgts);
      nmoved = 0;

      /**********************************************/
      /* PASS ONE -- record stats for desired moves */
      /**********************************************/
      for (iii=0; iii<nvtxs; iii++) {
        i = perm[iii];
        from = tmp_where[i];
        nvwgt = graph->nvwgt+i*ncon;

        for (h=0; h<ncon; h++)
          if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)
            break;

        if (h < ncon) {
          continue;
        }

        /* check for a potential improvement */
        if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
            to = my_edegrees[k].edge;
            if (ProperSide(c, pperm[from], pperm[to])) {
              for (h=0; h<ncon; h++)
                if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                  break;

              if (h == ncon)
                break;
            }
          }
          oldto = to;

          /* check if a subdomain was found that fits */
          if (k < tmp_rinfo[i].ndegrees) {
            for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
              to = my_edegrees[j].edge;
              if (ProperSide(c, pperm[from], pperm[to])) {
                for (h=0; h<ncon; h++)
                  if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                    break;

                if (h == ncon) {
                  if (my_edegrees[j].ewgt > my_edegrees[k].ewgt ||
                   (my_edegrees[j].ewgt == my_edegrees[k].ewgt &&
                   IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){
                    k = j;
                    oldto = my_edegrees[k].edge;
                  }
                }
              }
            }
            to = oldto;

            if (my_edegrees[k].ewgt > tmp_rinfo[i].id ||
            (my_edegrees[k].ewgt == tmp_rinfo[i].id &&
            (imbalanced ||  graph->level > 3  || iii % 8 == 0) &&
            IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){

              /****************************************/
              /* Update tmp arrays of the moved vertex */
              /****************************************/
              tmp_where[i] = to;
              moved[nmoved++] = i;
              for (h=0; h<ncon; h++) {
                lnpwgts[to*ncon+h] += nvwgt[h];
                lnpwgts[from*ncon+h] -= nvwgt[h];
                gnpwgts[to*ncon+h] += nvwgt[h];
                gnpwgts[from*ncon+h] -= nvwgt[h];
                movewgts[to*ncon+h] += nvwgt[h];
                movewgts[from*ncon+h] -= nvwgt[h];
              }

              tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
              SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
              if (my_edegrees[k].ewgt == 0) {
                tmp_rinfo[i].ndegrees--;
                my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
                my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
              }
              else {
                my_edegrees[k].edge = from;
              }

              /* Update the degrees of adjacent vertices */
              for (j=xadj[i]; j<xadj[i+1]; j++) {
                /* no need to bother about vertices on different pe's */
                if (ladjncy[j] >= nvtxs)
                  continue;

                me = ladjncy[j];
                mydomain = tmp_where[me];

                myrinfo = tmp_rinfo+me;
                your_edegrees = myrinfo->degrees;

                if (mydomain == from) {
                  INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
                }
                else {
                  if (mydomain == to) {
                    INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                  }
                }

                /* Remove contribution from the .ed of 'from' */
                if (mydomain != from) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == from) {
                      if (your_edegrees[k].ewgt == adjwgt[j]) {
                        myrinfo->ndegrees--;
                        your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                        your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                      }
                      else {
                        your_edegrees[k].ewgt -= adjwgt[j];
                      }
                      break;
                    }
                  }
                }

                /* Add contribution to the .ed of 'to' */
                if (mydomain != to) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == to) {
                      your_edegrees[k].ewgt += adjwgt[j];
                      break;
                    }
                  }
                  if (k == myrinfo->ndegrees) {
                    your_edegrees[myrinfo->ndegrees].edge = to;
                    your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                  }
                }
              }
            }
          }
        }
      }

      /******************************************/
      /* Let processors know the subdomain wgts */
      /* if all proposed moves commit.          */
      /******************************************/
      MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);

      /**************************/
      /* compute overfill array */
      /**************************/
      overweight = 0;
      for (j=0; j<nparts; j++) {
        for (h=0; h<ncon; h++) {
          if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) {
            overfill[j*ncon+h] =
            (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) /
            (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]);
          }
          else {
            overfill[j*ncon+h] = 0.0;
          }

          overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0);
          overfill[j*ncon+h] *= movewgts[j*ncon+h];

          if (overfill[j*ncon+h] > 0.0)
            overweight = 1;

          ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] ||
          pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h],
          (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h],
          badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h]));
        }
      }

      /****************************************************/
      /* select moves to undo according to overfill array */
      /****************************************************/
      if (overweight == 1) {
        for (iii=0; iii<nmoved; iii++) {
          i = moved[iii];
          oldto = tmp_where[i];
          nvwgt = graph->nvwgt+i*ncon;
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++)
            if (my_edegrees[k].edge == where[i])
              break;

          for (h=0; h<ncon; h++)
            if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0)
              break;

          /**********************************/
          /* nullify this move if necessary */
          /**********************************/
          if (k != tmp_rinfo[i].ndegrees && h != ncon) {
            moved[iii] = -1;
            from = oldto;
            to = where[i];

            for (h=0; h<ncon; h++) {
              overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0);
            }

            tmp_where[i] = to;
            tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
            SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
            if (my_edegrees[k].ewgt == 0) {
              tmp_rinfo[i].ndegrees--;
              my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
              my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
            }
            else {
              my_edegrees[k].edge = from;
            }

            for (h=0; h<ncon; h++) {
              lnpwgts[to*ncon+h] += nvwgt[h];
              lnpwgts[from*ncon+h] -= nvwgt[h];
            }

            /* Update the degrees of adjacent vertices */
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              /* no need to bother about vertices on different pe's */
              if (ladjncy[j] >= nvtxs)
                continue;

              me = ladjncy[j];
              mydomain = tmp_where[me];

              myrinfo = tmp_rinfo+me;
              your_edegrees = myrinfo->degrees;

              if (mydomain == from) {
                INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
              }
              else {
                if (mydomain == to) {
                  INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                }
              }

              /* Remove contribution from the .ed of 'from' */
              if (mydomain != from) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == from) {
                    if (your_edegrees[k].ewgt == adjwgt[j]) {
                      myrinfo->ndegrees--;
                      your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                      your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                    }
                    else {
                      your_edegrees[k].ewgt -= adjwgt[j];
                    }
                    break;
                  }
                }
              }

              /* Add contribution to the .ed of 'to' */
              if (mydomain != to) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == to) {
                    your_edegrees[k].ewgt += adjwgt[j];
                    break;
                  }
                }
                if (k == myrinfo->ndegrees) {
                  your_edegrees[myrinfo->ndegrees].edge = to;
                  your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                }
              }
            }
          }
        }
      }

      /*************************************************/
      /* PASS TWO -- commit the remainder of the moves */
      /*************************************************/
      nlupd = nsupd = nmoves = nchanged = 0;
      for (iii=0; iii<nmoved; iii++) {
        i = moved[iii];
        if (i == -1)
          continue;

        where[i] = tmp_where[i];

        /* Make sure to update the vertex information */
        if (htable[i] == 0) {
          /* make sure you do the update */
          htable[i] = 1;
          update[nlupd++] = i;
        }

        /* Put the vertices adjacent to i into the update array */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = ladjncy[j];
          if (htable[k] == 0) {
            htable[k] = 1;
            if (k<nvtxs)
              update[nlupd++] = k;
            else
              supdate[nsupd++] = k;
          }
        }
        nmoves++;
        nswaps++;

        /* check number of zero-gain moves */
        for (k=0; k<rinfo[i].ndegrees; k++)
          if (rinfo[i].degrees[k].edge == to)
            break;
        if (rinfo[i].id == rinfo[i].degrees[k].ewgt)
          nzgswaps++;

        if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
          changed[nchanged++] = i;
      }

      /* Tell interested pe's the new where[] info for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
      swchanges, rwchanges, wspace->pv4); 


      IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
      rprintf(ctrl, "\t[%d %d], [%.4f],  [%d %d %d]\n",
      pass, c, badmaxpwgt[0],
      GlobalSESum(ctrl, nmoves),
      GlobalSESum(ctrl, nsupd),
      GlobalSESum(ctrl, nlupd)));

      /*-------------------------------------------------------------
      / Time to communicate with processors to send the vertices
      / whose degrees need to be update.
      /-------------------------------------------------------------*/
      /* Issue the receives first */
      for (i=0; i<nnbrs; i++) {
        MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
                  peind[i], 1, ctrl->comm, ctrl->rreq+i);
      }

      /* Issue the sends next. This needs some preporcessing */
      for (i=0; i<nsupd; i++) {
        htable[supdate[i]] = 0;
        supdate[i] = graph->imap[supdate[i]];
      }
      iidxsort(nsupd, supdate);

      for (j=i=0; i<nnbrs; i++) {
        yourlastvtx = vtxdist[peind[i]+1];
        for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); 
        MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
        j = k;
      }

      /* OK, now get into the loop waiting for the send/recv operations to finish */
      MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
      for (i=0; i<nnbrs; i++) 
        MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
      MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);


      /*-------------------------------------------------------------
      / Place the recieved to-be updated vertices into update[] 
      /-------------------------------------------------------------*/
      for (i=0; i<nnbrs; i++) {
        pe_updates = rupdate+sendptr[i];
        for (j=0; j<nupds_pe[i]; j++) {
          k = pe_updates[j];
          if (htable[k-firstvtx] == 0) {
            htable[k-firstvtx] = 1;
            update[nlupd++] = k-firstvtx;
          }
        }
      }


      /*-------------------------------------------------------------
      / Update the rinfo of the vertices in the update[] array
      /-------------------------------------------------------------*/
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        ASSERT(ctrl, htable[i] == 1);

        htable[i] = 0;

        mydomain = where[i];
        myrinfo = rinfo+i;
        tmp_myrinfo = tmp_rinfo+i;
        my_edegrees = myrinfo->degrees;
        your_edegrees = tmp_myrinfo->degrees;

        graph->lmincut -= myrinfo->ed;
        myrinfo->ndegrees = 0;
        myrinfo->id = 0;
        myrinfo->ed = 0;

        for (j=xadj[i]; j<xadj[i+1]; j++) {
          yourdomain = where[ladjncy[j]];
          if (mydomain != yourdomain) {
            myrinfo->ed += adjwgt[j];

            for (k=0; k<myrinfo->ndegrees; k++) {
              if (my_edegrees[k].edge == yourdomain) {
                my_edegrees[k].ewgt += adjwgt[j];
                your_edegrees[k].ewgt += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              my_edegrees[k].edge = yourdomain;
              my_edegrees[k].ewgt = adjwgt[j];
              your_edegrees[k].edge = yourdomain;
              your_edegrees[k].ewgt = adjwgt[j];
              myrinfo->ndegrees++;
            }
            ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
            ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);

          }
          else {
            myrinfo->id += adjwgt[j];
          }
        }
        graph->lmincut += myrinfo->ed;

        tmp_myrinfo->id = myrinfo->id;
        tmp_myrinfo->ed = myrinfo->ed;
        tmp_myrinfo->ndegrees = myrinfo->ndegrees;
      }

      /* finally, sum-up the partition weights */
      MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);
    }
    graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

    if (graph->mincut == oldcut)
      break;
  }

/*
  gnswaps = GlobalSESum(ctrl, nswaps);
  gnzgswaps = GlobalSESum(ctrl, nzgswaps);
  if (mype == 0)
    printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps);
*/

  GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
  GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
  GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM);
  GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
}
예제 #28
0
/*************************************************************************
* This function compresses a graph by merging identical vertices
* The compression should lead to at least 10% reduction.
**************************************************************************/
void CompressGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *cptr, idxtype *cind)
{
  int i, ii, iii, j, jj, k, l, cnvtxs, cnedges;
  idxtype *cxadj, *cadjncy, *cvwgt, *mark, *map;
  KeyValueType *keys;

  mark = idxsmalloc(nvtxs, -1, "CompressGraph: mark");
  map = idxsmalloc(nvtxs, -1, "CompressGraph: map");
  keys = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "CompressGraph: keys");

  /* Compute a key for each adjacency list */
  for (i=0; i<nvtxs; i++) {
    k = 0;
    for (j=xadj[i]; j<xadj[i+1]; j++)
      k += adjncy[j];
    keys[i].key = k+i; /* Add the diagonal entry as well */
    keys[i].val = i;
  }

  ikeysort(nvtxs, keys);

  l = cptr[0] = 0;
  for (cnvtxs=i=0; i<nvtxs; i++) {
    ii = keys[i].val;
    if (map[ii] == -1) { 
      mark[ii] = i;  /* Add the diagonal entry */
      for (j=xadj[ii]; j<xadj[ii+1]; j++) 
        mark[adjncy[j]] = i;

      cind[l++] = ii;
      map[ii] = cnvtxs;

      for (j=i+1; j<nvtxs; j++) {
        iii = keys[j].val;

        if (keys[i].key != keys[j].key || xadj[ii+1]-xadj[ii] != xadj[iii+1]-xadj[iii])
          break; /* Break if keys or degrees are different */

        if (map[iii] == -1) { /* Do a comparison if iii has not been mapped */ 
          for (jj=xadj[iii]; jj<xadj[iii+1]; jj++) {
            if (mark[adjncy[jj]] != i)
              break;
          }

          if (jj == xadj[iii+1]) { /* Identical adjacency structure */
            map[iii] = cnvtxs;
            cind[l++] = iii;
          }
        }
      }

      cptr[++cnvtxs] = l;
    }
  }

  /* printf("Original: %6d, Compressed: %6d\n", nvtxs, cnvtxs); */


  InitGraph(graph);

  if (cnvtxs >= COMPRESSION_FRACTION*nvtxs) {
    graph->nvtxs = nvtxs;
    graph->nedges = xadj[nvtxs];
    graph->ncon = 1;
    graph->xadj = xadj;
    graph->adjncy = adjncy;

    graph->gdata = idxmalloc(3*nvtxs+graph->nedges, "CompressGraph: gdata");
    graph->vwgt    	= graph->gdata;
    graph->adjwgtsum    = graph->gdata+nvtxs;
    graph->cmap		= graph->gdata+2*nvtxs;
    graph->adjwgt	= graph->gdata+3*nvtxs;

    idxset(nvtxs, 1, graph->vwgt);
    idxset(graph->nedges, 1, graph->adjwgt);
    for (i=0; i<nvtxs; i++)
      graph->adjwgtsum[i] = xadj[i+1]-xadj[i];

    graph->label = idxmalloc(nvtxs, "CompressGraph: label");
    for (i=0; i<nvtxs; i++)
      graph->label[i] = i;
  }
  else { /* Ok, form the compressed graph  */
    cnedges = 0;
    for (i=0; i<cnvtxs; i++) {
      ii = cind[cptr[i]];
      cnedges += xadj[ii+1]-xadj[ii];
    }

    /* Allocate memory for the compressed graph*/
    graph->gdata = idxmalloc(4*cnvtxs+1 + 2*cnedges, "CompressGraph: gdata");
    cxadj = graph->xadj		= graph->gdata;
    cvwgt = graph->vwgt         = graph->gdata + cnvtxs+1;
    graph->adjwgtsum        	= graph->gdata + 2*cnvtxs+1;
    graph->cmap                 = graph->gdata + 3*cnvtxs+1;
    cadjncy = graph->adjncy     = graph->gdata + 4*cnvtxs+1;
    graph->adjwgt            	= graph->gdata + 4*cnvtxs+1 + cnedges;

    /* Now go and compress the graph */
    idxset(nvtxs, -1, mark);
    l = cxadj[0] = 0;
    for (i=0; i<cnvtxs; i++) {
      cvwgt[i] = cptr[i+1]-cptr[i];
      mark[i] = i;  /* Remove any dioganal entries in the compressed graph */
      for (j=cptr[i]; j<cptr[i+1]; j++) {
        ii = cind[j];
        for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
          k = map[adjncy[jj]];
          if (mark[k] != i) 
            cadjncy[l++] = k;
          mark[k] = i;
        }
      }
      cxadj[i+1] = l;
    }

    graph->nvtxs = cnvtxs;
    graph->nedges = l;
    graph->ncon = 1;

    idxset(graph->nedges, 1, graph->adjwgt);
    for (i=0; i<cnvtxs; i++)
      graph->adjwgtsum[i] = cxadj[i+1]-cxadj[i];

    graph->label = idxmalloc(cnvtxs, "CompressGraph: label");
    for (i=0; i<cnvtxs; i++)
      graph->label[i] = i;

  }

	GKfree(&keys, &map, &mark, LTERM);
}
예제 #29
0
파일: meshpart.c 프로젝트: Nasrollah/phasta
/*************************************************************************
* This function partitions a finite element mesh by partitioning its nodal
* graph using KMETIS and then assigning elements in a load balanced fashion.
**************************************************************************/
void METIS_PartMeshNodal(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, 
                         int *nparts, int *edgecut, idxtype *epart, idxtype *npart)
{
  int i, j, k, me;
  idxtype *xadj, *adjncy, *pwgts;
  int options[10], pnumflag=0, wgtflag=0;
  int nnbrs, nbrind[200], nbrwgt[200], maxpwgt;
  int esize, esizes[] = {-1, 3, 4, 8, 4};

  esize = esizes[*etype];

  if (*numflag == 1)
    ChangeMesh2CNumbering((*ne)*esize, elmnts);

  xadj = idxmalloc(*nn+1, "METIS_MESHPARTNODAL: xadj");
  adjncy = idxmalloc(20*(*nn), "METIS_MESHPARTNODAL: adjncy");

  METIS_MeshToNodal(ne, nn, elmnts, etype, &pnumflag, xadj, adjncy);

  adjncy = realloc(adjncy, xadj[*nn]*sizeof(idxtype));

  options[0] = 0;
  METIS_PartGraphKway(nn, xadj, adjncy, NULL, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, npart);

  /* OK, now compute an element partition based on the nodal partition npart */
  idxset(*ne, -1, epart);
  pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTNODAL: pwgts");
  for (i=0; i<*ne; i++) {
    me = npart[elmnts[i*esize]];
    for (j=1; j<esize; j++) {
      if (npart[elmnts[i*esize+j]] != me)
        break;
    }
    if (j == esize) {
      epart[i] = me;
      pwgts[me]++;
    }
  }

  maxpwgt = 1.03*(*ne)/(*nparts);
  for (i=0; i<*ne; i++) {
    if (epart[i] == -1) { /* Assign the boundary element */
      nnbrs = 0;
      for (j=0; j<esize; j++) {
        me = npart[elmnts[i*esize+j]];
        for (k=0; k<nnbrs; k++) {
          if (nbrind[k] == me) {
            nbrwgt[k]++;
            break;
          }
        }
        if (k == nnbrs) {
          nbrind[nnbrs] = me;
          nbrwgt[nnbrs++] = 1;
        }
      }
      /* Try to assign it first to the domain with most things in common */
      j = iamax(nnbrs, nbrwgt);
      if (pwgts[nbrind[j]] < maxpwgt) {
        epart[i] = nbrind[j];
      }
      else {
        /* If that fails, assign it to a light domain */
        for (j=0; j<nnbrs; j++) {
          if (pwgts[nbrind[j]] < maxpwgt) {
            epart[i] = nbrind[j];
            break;
          }
        }
        if (j == nnbrs) 
          epart[i] = nbrind[iamax(nnbrs, nbrwgt)];
      }
      pwgts[epart[i]]++;
    }
  }

  if (*numflag == 1)
    ChangeMesh2FNumbering2((*ne)*esize, elmnts, *ne, *nn, epart, npart);

  GKfree(&xadj, &adjncy, &pwgts, LTERM);

}
예제 #30
0
파일: meshpart.c 프로젝트: Nasrollah/phasta
void METIS_PartMeshDual_WV(int *ne, int *nn, idxtype *elmnts, int *etype, int *numflag, 
                        int *nparts, int *edgecut, idxtype *epart, idxtype *npart, idxtype *vwgts)
{
  int i, j, k, me;
  idxtype *xadj, *adjncy, *pwgts, *nptr, *nind;
  int options[10], pnumflag=0, wgtflag=2;
  int nnbrs, nbrind[200], nbrwgt[200], maxpwgt;
  int esize, esizes[] = {-1, 3, 4, 8, 4};

  esize = esizes[*etype];

  if (*numflag == 1)
    ChangeMesh2CNumbering((*ne)*esize, elmnts);

  xadj = idxmalloc(*ne+1, "METIS_MESHPARTNODAL: xadj");
  adjncy = idxmalloc(esize*(*ne), "METIS_MESHPARTNODAL: adjncy");

  METIS_MeshToDual(ne, nn, elmnts, etype, &pnumflag, xadj, adjncy);

  options[0] = 0;
  METIS_PartGraphKway(ne, xadj, adjncy, vwgts, NULL, &wgtflag, &pnumflag, nparts, options, edgecut, epart);

  /* Construct the node-element list */
  nptr = idxsmalloc(*nn+1, 0, "METIS_MESHPARTDUAL: nptr");
  for (j=esize*(*ne), i=0; i<j; i++) 
    nptr[elmnts[i]]++;
  MAKECSR(i, *nn, nptr);

  nind = idxmalloc(nptr[*nn], "METIS_MESHPARTDUAL: nind");
  for (k=i=0; i<(*ne); i++) {
    for (j=0; j<esize; j++, k++) 
      nind[nptr[elmnts[k]]++] = i;
  }
  for (i=(*nn); i>0; i--)
    nptr[i] = nptr[i-1];
  nptr[0] = 0;


  /* OK, now compute a nodal partition based on the element partition npart */
  idxset(*nn, -1, npart);
  pwgts = idxsmalloc(*nparts, 0, "METIS_MESHPARTDUAL: pwgts");
  for (i=0; i<*nn; i++) {
    me = epart[nind[nptr[i]]];
    for (j=nptr[i]+1; j<nptr[i+1]; j++) {
      if (epart[nind[j]] != me)
        break;
    }
    if (j == nptr[i+1]) {
      npart[i] = me;
      pwgts[me]++;
    }
  }

  maxpwgt = 1.03*(*nn)/(*nparts);
  for (i=0; i<*nn; i++) {
    if (npart[i] == -1) { /* Assign the boundary element */
      nnbrs = 0;
      for (j=nptr[i]; j<nptr[i+1]; j++) {
        me = epart[nind[j]];
        for (k=0; k<nnbrs; k++) {
          if (nbrind[k] == me) {
            nbrwgt[k]++;
            break;
          }
        }
        if (k == nnbrs) {
          nbrind[nnbrs] = me;
          nbrwgt[nnbrs++] = 1;
        }
      }
      /* Try to assign it first to the domain with most things in common */
      j = iamax(nnbrs, nbrwgt);
      if (pwgts[nbrind[j]] < maxpwgt) {
        npart[i] = nbrind[j];
      }
      else {
        /* If that fails, assign it to a light domain */
        npart[i] = nbrind[0];
        for (j=0; j<nnbrs; j++) {
          if (pwgts[nbrind[j]] < maxpwgt) {
            npart[i] = nbrind[j];
            break;
          }
        }
      }
      pwgts[npart[i]]++;
    }
  }

  if (*numflag == 1)
    ChangeMesh2FNumbering2((*ne)*esize, elmnts, *ne, *nn, epart, npart);

  GKfree(&xadj, &adjncy, &pwgts, &nptr, &nind, LTERM);

}