void gk_array2csr(size_t n, size_t range, int *array, int *ptr, int *ind)
{
  size_t i;

  gk_iset(range+1, 0, ptr);

  for (i=0; i<n; i++) 
    ptr[array[i]]++;

  /* Compute the ptr, ind structure */
  MAKECSR(i, range, ptr);
  for (i=0; i<n; i++)
    ind[ptr[array[i]]++] = i;
  SHIFTCSR(i, range, ptr);
}
Example #2
0
/*************************************************************************
* This function partitions a finite element mesh by partitioning its dual
* graph using KMETIS and then assigning nodes in a load balanced fashion.
**************************************************************************/
int METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 
          idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, 
          real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, 
          idx_t *npart) 
{
  int sigrval=0, renumber=0, ptype;
  idx_t i, j;
  idx_t *xadj=NULL, *adjncy=NULL, *nptr=NULL, *nind=NULL;
  idx_t ncon=1, pnumflag=0;
  int rstatus = METIS_OK;

  /* set up malloc cleaning code and signal catchers */
  if (!gk_malloc_init()) 
    return METIS_ERROR_MEMORY;

  gk_sigtrap();

  if ((sigrval = gk_sigcatch()) != 0) 
    goto SIGTHROW;

  renumber = GETOPTION(options, METIS_OPTION_NUMBERING, 0);
  ptype    = GETOPTION(options, METIS_OPTION_PTYPE, METIS_PTYPE_KWAY);

  /* renumber the mesh */
  if (renumber) {
    ChangeMesh2CNumbering(*ne, eptr, eind);
    options[METIS_OPTION_NUMBERING] = 0;
  }

  /* get the dual graph */
  rstatus = METIS_MeshToDual(ne, nn, eptr, eind, ncommon, &pnumflag, &xadj, &adjncy);
  if (rstatus != METIS_OK)
    raise(SIGERR);

  /* partition the graph */
  if (ptype == METIS_PTYPE_KWAY) 
    rstatus = METIS_PartGraphKway(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
                  nparts, tpwgts, NULL, options, objval, epart);
  else 
    rstatus = METIS_PartGraphRecursive(ne, &ncon, xadj, adjncy, vwgt, vsize, NULL, 
                  nparts, tpwgts, NULL, options, objval, epart);

  if (rstatus != METIS_OK)
    raise(SIGERR);


  /* construct the node-element list */
  nptr = ismalloc(*nn+1, 0, "METIS_PartMeshDual: nptr");
  nind = imalloc(eptr[*ne], "METIS_PartMeshDual: nind");

  for (i=0; i<*ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nptr[eind[j]]++;
  }
  MAKECSR(i, *nn, nptr);

  for (i=0; i<*ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nind[nptr[eind[j]]++] = i;
  }
  SHIFTCSR(i, *nn, nptr);

  /* partition the other side of the mesh */
  InduceRowPartFromColumnPart(*nn, nptr, nind, npart, epart, *nparts, tpwgts);

  gk_free((void **)&nptr, &nind, LTERM);


SIGTHROW:
  if (renumber) {
    ChangeMesh2FNumbering2(*ne, *nn, eptr, eind, epart, npart);
    options[METIS_OPTION_NUMBERING] = 1;
  }

  METIS_Free(xadj);
  METIS_Free(adjncy);

  gk_siguntrap();
  gk_malloc_cleanup(0);

  return metis_rcode(sigrval);
}
Example #3
0
void EliminateComponents(ctrl_t *ctrl, graph_t *graph) {
    idx_t i, ii, j, jj, k, me, nparts, nvtxs, ncon, ncmps, other,
        ncand, target;
    idx_t *xadj, *adjncy, *vwgt, *adjwgt, *where, *pwgts;
    idx_t *cptr, *cind, *cpvec, *pcptr, *pcind, *cwhere;
    idx_t cid, bestcid, *cwgt, *bestcwgt;
    idx_t ntodo, oldntodo, *todo;
    rkv_t *cand;
    real_t *tpwgts;
    idx_t *vmarker = NULL, *pmarker = NULL, *modind = NULL;  /* volume specific work arrays */

    WCOREPUSH;

    nvtxs = graph->nvtxs;
    ncon = graph->ncon;
    xadj = graph->xadj;
    adjncy = graph->adjncy;
    vwgt = graph->vwgt;
    adjwgt = (ctrl->objtype == METIS_OBJTYPE_VOL ? NULL : graph->adjwgt);

    where = graph->where;
    pwgts = graph->pwgts;

    nparts = ctrl->nparts;
    tpwgts = ctrl->tpwgts;

    cptr = iwspacemalloc(ctrl, nvtxs + 1);
    cind = iwspacemalloc(ctrl, nvtxs);

    ncmps = FindPartitionInducedComponents(graph, where, cptr, cind);

    IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO,
          printf("I found %"PRIDX" components, for this %"PRIDX"-way partition\n",
                 ncmps, nparts));

    /* There are more components than partitions */
    if (ncmps > nparts) {
        cwgt = iwspacemalloc(ctrl, ncon);
        bestcwgt = iwspacemalloc(ctrl, ncon);
        cpvec = iwspacemalloc(ctrl, nparts);
        pcptr = iset(nparts + 1, 0, iwspacemalloc(ctrl, nparts + 1));
        pcind = iwspacemalloc(ctrl, ncmps);
        cwhere = iset(nvtxs, -1, iwspacemalloc(ctrl, nvtxs));
        todo = iwspacemalloc(ctrl, ncmps);
        cand = (rkv_t *) wspacemalloc(ctrl, nparts * sizeof(rkv_t));

        if (ctrl->objtype == METIS_OBJTYPE_VOL) {
            /* Vol-refinement specific working arrays */
            modind = iwspacemalloc(ctrl, nvtxs);
            vmarker = iset(nvtxs, 0, iwspacemalloc(ctrl, nvtxs));
            pmarker = iset(nparts, -1, iwspacemalloc(ctrl, nparts));
        }


        /* Get a CSR representation of the components-2-partitions mapping */
        for (i = 0; i < ncmps; i++) {
            pcptr[where[cind[cptr[i]]]]++;
        }
        MAKECSR(i, nparts, pcptr);
        for (i = 0; i < ncmps; i++) {
            pcind[pcptr[where[cind[cptr[i]]]]++] = i;
        }
        SHIFTCSR(i, nparts, pcptr);

        /* Assign the heaviest component of each partition to its original partition */
        for (ntodo = 0, i = 0; i < nparts; i++) {
            if (pcptr[i + 1] - pcptr[i] == 1) {
                bestcid = pcind[pcptr[i]];
            } else {
                for (bestcid = -1, j = pcptr[i]; j < pcptr[i + 1]; j++) {
                    cid = pcind[j];
                    iset(ncon, 0, cwgt);
                    for (ii = cptr[cid]; ii < cptr[cid + 1]; ii++) {
                        iaxpy(ncon, 1, vwgt + cind[ii] * ncon, 1, cwgt, 1);
                    }
                    if (bestcid == -1 || isum(ncon, bestcwgt, 1) < isum(ncon, cwgt, 1)) {
                        bestcid = cid;
                        icopy(ncon, cwgt, bestcwgt);
                    }
                }
                /* Keep track of those that need to be dealt with */
                for (j = pcptr[i]; j < pcptr[i + 1]; j++) {
                    if (pcind[j] != bestcid) {
                        todo[ntodo++] = pcind[j];
                    }
                }
            }

            for (j = cptr[bestcid]; j < cptr[bestcid + 1]; j++) {
                ASSERT(where[cind[j]] == i);
                cwhere[cind[j]] = i;
            }
        }

        while (ntodo > 0) {
            oldntodo = ntodo;
            for (i = 0; i < ntodo; i++) {
                cid = todo[i];
                me = where[cind[cptr[cid]]];  /* Get the domain of this component */

                /* Determine the weight of the block to be moved */
                iset(ncon, 0, cwgt);
                for (j = cptr[cid]; j < cptr[cid + 1]; j++) {
                    iaxpy(ncon, 1, vwgt + cind[j] * ncon, 1, cwgt, 1);
                }

                IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO,
                      printf("Trying to move %"PRIDX" [%"PRIDX"] from %"PRIDX"\n",
                             cid, isum(ncon, cwgt, 1), me));

                /* Determine the connectivity */
                iset(nparts, 0, cpvec);
                for (j = cptr[cid]; j < cptr[cid + 1]; j++) {
                    ii = cind[j];
                    for (jj = xadj[ii]; jj < xadj[ii + 1]; jj++) {
                        if (cwhere[adjncy[jj]] != -1) {
                            cpvec[cwhere[adjncy[jj]]] += (adjwgt ? adjwgt[jj] : 1);
                        }
                    }
                }

                /* Put the neighbors into a cand[] array for sorting */
                for (ncand = 0, j = 0; j < nparts; j++) {
                    if (cpvec[j] > 0) {
                        cand[ncand].key = cpvec[j];
                        cand[ncand++].val = j;
                    }
                }
                if (ncand == 0) {
                    continue;
                }

                rkvsortd(ncand, cand);

                /* Limit the moves to only the top candidates, which are defined as
                   those with connectivity at least 50% of the best.
                   This applies only when ncon=1, as for multi-constraint, balancing
                   will be hard. */
                if (ncon == 1) {
                    for (j = 1; j < ncand; j++) {
                        if (cand[j].key < .5 * cand[0].key) {
                            break;
                        }
                    }
                    ncand = j;
                }

                /* Now among those, select the one with the best balance */
                target = cand[0].val;
                for (j = 1; j < ncand; j++) {
                    if (BetterBalanceKWay(ncon, cwgt, ctrl->ubfactors,
                                          1, pwgts + target * ncon, ctrl->pijbm + target * ncon,
                                          1, pwgts + cand[j].val * ncon, ctrl->pijbm + cand[j].val * ncon)) {
                                              target = cand[j].val;
                    }
                }

                IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO,
                      printf("\tMoving it to %"PRIDX" [%"PRIDX"] [%"PRIDX"]\n", target, cpvec[target], ncand));

                /* Note that as a result of a previous movement, a connected component may
                   now will like to stay to its original partition */
                if (target != me) {
                    switch (ctrl->objtype) {
                        case METIS_OBJTYPE_CUT:MoveGroupContigForCut(ctrl, graph, target, cid, cptr, cind);
                            break;

                        case METIS_OBJTYPE_VOL:
                            MoveGroupContigForVol(ctrl, graph, target, cid, cptr, cind,
                                                  vmarker, pmarker, modind);
                            break;

                        default:gk_errexit(SIGERR, "Unknown objtype %d\n", ctrl->objtype);
                    }
                }

                /* Update the cwhere vector */
                for (j = cptr[cid]; j < cptr[cid + 1]; j++) {
                    cwhere[cind[j]] = target;
                }

                todo[i] = todo[--ntodo];
            }
            if (oldntodo == ntodo) {
                IFSET(ctrl->dbglvl, METIS_DBG_CONTIGINFO, printf("Stopped at ntodo: %"PRIDX"\n", ntodo));
                break;
            }
        }

        for (i = 0; i < nvtxs; i++) {
            ASSERT(where[i] == cwhere[i]);
        }

    }

    WCOREPOP;
}
Example #4
0
void CreateGraphNodal(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, 
          idx_t **r_xadj, idx_t **r_adjncy)
{
  idx_t i, j, nnbrs;
  idx_t *nptr, *nind;
  idx_t *xadj, *adjncy;
  idx_t *marker, *nbrs;


  /* construct the node-element list first */
  nptr = ismalloc(nn+1, 0, "CreateGraphNodal: nptr");
  nind = imalloc(eptr[ne], "CreateGraphNodal: nind");

  for (i=0; i<ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nptr[eind[j]]++;
  }
  MAKECSR(i, nn, nptr);

  for (i=0; i<ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nind[nptr[eind[j]]++] = i;
  }
  SHIFTCSR(i, nn, nptr);


  /* Allocate memory for xadj, since you know its size.
     These are done using standard malloc as they are returned
     to the calling function */
  if ((xadj = (idx_t *)malloc((nn+1)*sizeof(idx_t))) == NULL)
    gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n");
  *r_xadj = xadj;
  iset(nn+1, 0, xadj);

  /* allocate memory for working arrays used by FindCommonElements */
  marker = ismalloc(nn, 0, "CreateGraphNodal: marker");
  nbrs   = imalloc(nn, "CreateGraphNodal: nbrs");

  for (i=0; i<nn; i++) {
    xadj[i] = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, 
                  eind, marker, nbrs);
  }
  MAKECSR(i, nn, xadj);

  /* Allocate memory for adjncy, since you now know its size.
     These are done using standard malloc as they are returned
     to the calling function */
  if ((adjncy = (idx_t *)malloc(xadj[nn]*sizeof(idx_t))) == NULL) {
    free(xadj);
    *r_xadj = NULL;
    gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n");
  }
  *r_adjncy = adjncy;

  for (i=0; i<nn; i++) {
    nnbrs = FindCommonNodes(i, nptr[i+1]-nptr[i], nind+nptr[i], eptr, 
                eind, marker, nbrs);
    for (j=0; j<nnbrs; j++)
      adjncy[xadj[i]++] = nbrs[j];
  }
  SHIFTCSR(i, nn, xadj);
  
  gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM);
}
Example #5
0
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, 
                 int *numflag, int *ncommonnodes, idxtype **xadj, 
		 idxtype **adjncy, MPI_Comm *comm)
{
  int i, j, jj, k, kk, m;
  int npes, mype, pe, count, mask, pass;
  int nelms, lnns, my_nns, node;
  int firstelm, firstnode, lnode, nrecv, nsend;
  int *scounts, *rcounts, *sdispl, *rdispl;
  idxtype *nodedist, *nmap, *auxarray;
  idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
  idxtype *sbuffer, *rbuffer, *htable;
  KeyValueType *nodelist, *recvbuffer;
  idxtype ind[200], wgt[200];
  int gmaxnode, gminnode;
  CtrlType ctrl;


  SetUpCtrl(&ctrl, -1, 0, *comm);

  npes = ctrl.npes;
  mype = ctrl.mype;

  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERTS(nelms > 0);

  /* construct node distribution array */
  nodedist = idxsmalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
  auxarray = idxmalloc(eptr[nelms], "auxarray");
  htable   = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(4*npes+2, "scounts");
  rcounts  = scounts+npes;
  sdispl   = scounts+2*npes;
  rdispl   = scounts+3*npes+1;


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikeysort(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = idxmalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  MPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERTS(pe < npes);

  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERTS(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");

  MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = idxsmalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERTS(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERTS(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv = rdispl[npes];
  rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));

  MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  k = -1;
  nptr = idxsmalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERTS(k+1 == lnns);
  ASSERTS(nptr[lnns] == nrecv)

  myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
  idxset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommonnodes) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, 
         (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, 
	 (void **)&gnind, (void **)&auxarray, LTERM);

  FreeCtrl(&ctrl);

  return;
}
Example #6
0
/*************************************************************************
* This function is the entry point of the initial partitioning algorithm.
* This algorithm assembles the graph to all the processors and preceed
* serially.
**************************************************************************/
idx_t Mc_Diffusion(ctrl_t *ctrl, graph_t *graph, idx_t *vtxdist, idx_t *where, 
          idx_t *home, idx_t npasses)
{
  idx_t h, i, j;
  idx_t nvtxs, nedges, ncon, pass, iter, domain, processor;
  idx_t nparts, mype, npes, nlinks, me, you, wsize;
  idx_t nvisited, nswaps = -1, tnswaps, done, alldone = -1;
  idx_t *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap;
  idx_t *pack, *unpack, *match, *proc2sub, *sub2proc;
  idx_t *visited, *gvisited;
  real_t *transfer, *npwgts, maxdiff, minflow, maxflow;
  real_t lbavg, oldlbavg, ubavg, *lbvec;
  real_t *diff_flows, *sr_flows;
  real_t diff_lbavg, sr_lbavg, diff_cost, sr_cost;
  idx_t *rbuffer, *sbuffer; 
  idx_t *rcount, *rdispl;
  real_t *solution, *load, *workspace;
  matrix_t matrix;
  graph_t *egraph;

  if (graph->ncon > 3)
    return 0;

  WCOREPUSH;

  nvtxs  = graph->nvtxs;
  nedges = graph->nedges;
  ncon   = graph->ncon;

  nparts = ctrl->nparts;
  mype   = ctrl->mype;
  npes   = ctrl->npes;
  ubavg  = ravg(ncon, ctrl->ubvec);

  /* initialize variables and allocate memory */
  lbvec      = rwspacemalloc(ctrl, ncon);
  diff_flows = rwspacemalloc(ctrl, ncon);
  sr_flows   = rwspacemalloc(ctrl, ncon);

  load                       = rwspacemalloc(ctrl, nparts);
  solution                   = rwspacemalloc(ctrl, nparts);
  npwgts = graph->gnpwgts    = rwspacemalloc(ctrl, ncon*nparts);
  matrix.values              = rwspacemalloc(ctrl, nedges);
  transfer = matrix.transfer = rwspacemalloc(ctrl, ncon*nedges);

  proc2sub               = iwspacemalloc(ctrl, gk_max(nparts, npes*2));
  sub2proc               = iwspacemalloc(ctrl, nparts);
  match                  = iwspacemalloc(ctrl, nparts);
  rowptr = matrix.rowptr = iwspacemalloc(ctrl, nparts+1);
  colind = matrix.colind = iwspacemalloc(ctrl, nedges);

  rcount = iwspacemalloc(ctrl, npes);
  rdispl = iwspacemalloc(ctrl, npes+1);

  pack       = iwspacemalloc(ctrl, nvtxs);
  unpack     = iwspacemalloc(ctrl, nvtxs);
  rbuffer    = iwspacemalloc(ctrl, nvtxs);
  sbuffer    = iwspacemalloc(ctrl, nvtxs);
  map        = iwspacemalloc(ctrl, nvtxs);
  rmap       = iwspacemalloc(ctrl, nvtxs);
  diff_where = iwspacemalloc(ctrl, nvtxs);
  ehome      = iwspacemalloc(ctrl, nvtxs);


  wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "Mc_Diffusion: workspace");

  graph->ckrinfo = (ckrinfo_t *)gk_malloc(nvtxs*sizeof(ckrinfo_t), "Mc_Diffusion: rinfo");


  /* construct subdomain connectivity graph */
  matrix.nrows = nparts;
  SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);
  nlinks = (matrix.nnzs-nparts) / 2;

  visited  = iwspacemalloc(ctrl, matrix.nnzs);
  gvisited = iwspacemalloc(ctrl, matrix.nnzs);

  for (pass=0; pass<npasses; pass++) {
    rset(matrix.nnzs*ncon, 0.0, transfer);
    iset(matrix.nnzs, 0, gvisited);
    iset(matrix.nnzs, 0, visited);
    iter = nvisited = 0;

    /* compute ncon flow solutions */
    for (h=0; h<ncon; h++) {
      rset(nparts, 0.0, solution);
      ComputeLoad(graph, nparts, load, ctrl->tpwgts, h);

      lbvec[h] = (rmax(nparts, load)+1.0/nparts) * (real_t)nparts;

      ConjGrad2(&matrix, load, solution, 0.001, workspace);
      ComputeTransferVector(ncon, &matrix, solution, transfer, h);
    }

    oldlbavg = ravg(ncon, lbvec);
    tnswaps = 0;
    maxdiff = 0.0;
    for (i=0; i<nparts; i++) {
      for (j=rowptr[i]; j<rowptr[i+1]; j++) {
        maxflow = rmax(ncon, transfer+j*ncon);
        minflow = rmin(ncon, transfer+j*ncon);
        maxdiff = (maxflow - minflow > maxdiff) ? maxflow - minflow : maxdiff;
      }
    }

    while (nvisited < nlinks) {
      /* compute independent sets of subdomains */
      iset(gk_max(nparts, npes*2), UNMATCHED, proc2sub);
      CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon);

      /* set up the packing arrays */
      iset(nparts, UNMATCHED, sub2proc);
      for (i=0; i<npes*2; i++) {
        if (proc2sub[i] == UNMATCHED)
          break;

        sub2proc[proc2sub[i]] = i/2;
      }

      iset(npes, 0, rcount);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          rcount[processor]++;
      }

      rdispl[0] = 0;
      for (i=1; i<npes+1; i++)
        rdispl[i] = rdispl[i-1] + rcount[i-1];

      iset(nvtxs, UNMATCHED, unpack);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          unpack[rdispl[processor]++] = i;
      }

      SHIFTCSR(i, npes, rdispl);

      iset(nvtxs, UNMATCHED, pack);
      for (i=0; i<rdispl[npes]; i++) {
        ASSERT(unpack[i] != UNMATCHED);
        domain = where[unpack[i]];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          pack[unpack[i]] = i;
      }

      /* Compute the flows */
      if (proc2sub[mype*2] != UNMATCHED) {
        me  = proc2sub[2*mype];
        you = proc2sub[2*mype+1];
        ASSERT(me != you);

        for (j=rowptr[me]; j<rowptr[me+1]; j++) {
          if (colind[j] == you) {
            visited[j] = 1;
            rcopy(ncon, transfer+j*ncon, diff_flows);
            break;
          }
        }

        for (j=rowptr[you]; j<rowptr[you+1]; j++) {
          if (colind[j] == me) {
            visited[j] = 1;
            for (h=0; h<ncon; h++) {
              if (transfer[j*ncon+h] > 0.0)
                diff_flows[h] = -1.0 * transfer[j*ncon+h];
            }
            break;
          }
        } 

        nswaps = 1;
        rcopy(ncon, diff_flows, sr_flows);

        iset(nvtxs, 0, sbuffer);
        for (i=0; i<nvtxs; i++) {
          if (where[i] == me || where[i] == you)
            sbuffer[i] = 1;
        }

        egraph = ExtractGraph(ctrl, graph, sbuffer, map, rmap);

        if (egraph != NULL) {
          icopy(egraph->nvtxs, egraph->where, diff_where);
          for (j=0; j<egraph->nvtxs; j++)
            ehome[j] = home[map[j]];
 
          RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg);

          if (ncon <= 4) {
            sr_where      = egraph->where;
            egraph->where = diff_where;

            nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, 
                         &diff_cost, &diff_lbavg, 1.0/(real_t)nvtxs);

            if ((sr_lbavg < diff_lbavg &&
                (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) ||
                (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = sr_where[i];
            }
            else {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = diff_where[i];
            }
          }
          else {
            for (i=0; i<egraph->nvtxs; i++)
              where[map[i]] = egraph->where[i];
          }

          gk_free((void **)&egraph->xadj, &egraph->nvwgt, &egraph->adjncy, &egraph, LTERM);
        }

        /* Pack the flow data */
        iset(nvtxs, UNMATCHED, sbuffer);
        for (i=0; i<nvtxs; i++) {
          domain = where[i];
          if (domain == you || domain == me) 
            sbuffer[pack[i]] = where[i];
        }
      }

      /* Broadcast the flow data */
      gkMPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_T, 
          (void *)rbuffer, rcount, rdispl, IDX_T, ctrl->comm);

      /* Unpack the flow data */
      for (i=0; i<rdispl[npes]; i++) {
        if (rbuffer[i] != UNMATCHED) 
          where[unpack[i]] = rbuffer[i];
      }


      /* Do other stuff */
      gkMPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs,
          IDX_T, MPI_MAX, ctrl->comm);
      nvisited = isum(matrix.nnzs, gvisited, 1)/2;
      tnswaps += GlobalSESum(ctrl, nswaps);

      if (iter++ == NGD_PASSES)
        break;
    }

    /* perform serial refinement */
    Mc_ComputeSerialPartitionParams(ctrl, graph, nparts);
    Mc_SerialKWayAdaptRefine(ctrl, graph, nparts, home, ctrl->ubvec, 10);

    /* check for early breakout */
    for (h=0; h<ncon; h++) {
      lbvec[h] = (real_t)(nparts) *
        npwgts[rargmax_strd(nparts,npwgts+h,ncon)*ncon+h];
    }
    lbavg = ravg(ncon, lbvec);

    done = 0;
    if (tnswaps == 0 || lbavg >= oldlbavg || lbavg <= ubavg + 0.035)
      done = 1;

    alldone = GlobalSEMax(ctrl, done);
    if (alldone == 1)
      break;
  }

  /* ensure that all subdomains have at least one vertex */
/*
  iset(nparts, 0, match);
  for (i=0; i<nvtxs; i++)
    match[where[i]]++;

  done = 0;
  while (done == 0) {
    done = 1;

    me = iargmin(nparts, match);  
    if (match[me] == 0) {
      if (ctrl->mype == PE) printf("WARNING: empty subdomain %"PRIDX" in Mc_Diffusion\n", me);
      you = iargmax(nparts, match);  
      for (i=0; i<nvtxs; i++) {
        if (where[i] == you) {
          where[i] = me;
          match[you]--;
          match[me]++;
          done = 0;
          break;
        }
      }
    }
  }
*/
 
  /* now free memory and return */
  gk_free((void **)&workspace, (void **)&graph->ckrinfo, LTERM);
  graph->gnpwgts = NULL;
  graph->ckrinfo = NULL;

  WCOREPOP;

  return 0;
}
Example #7
0
/******************************************************************************
* This function creates a coarse graph corresponding to the partitioning vector
*******************************************************************************/
GraphType *CreatePartitionGraphForContact(idxtype nvtxs, idxtype *xadj, idxtype *adjncy, 
                idxtype *vwgt, idxtype *adjwgt, idxtype cnvtxs, idxtype *part)
{
  idxtype i, ii, j, jj, k, cnedges;
  idxtype *cxadj, *cadjncy, *cvwgt, *cadjwgt;
  idxtype *ptr, *ind, *marker;
  GraphType *cgraph;

  ptr    = idxsmalloc(cnvtxs+1, 0, "CreatePartitionGraph: ptr");
  ind    = idxmalloc(nvtxs, "CreatePartitionGraph: ind");
  marker = idxsmalloc(cnvtxs, -1, "CreatePartitionGraph: marker");

  cgraph = CreateGraph();

  cgraph->ncon  = 2;
  cgraph->nvtxs = cnvtxs;
  cxadj   = cgraph->xadj   = idxsmalloc(cnvtxs+1, 0, "CreatePartitionGraph: cxadj");
  cadjncy = cgraph->adjncy = idxmalloc(xadj[nvtxs], "CreatePartitionGraph: cadjncy");
  cvwgt   = cgraph->vwgt   = idxmalloc(2*cnvtxs, "CreatePartitionGraph: cvwgt");
  cadjwgt = cgraph->adjwgt = idxmalloc(xadj[nvtxs], "CreatePartitionGraph: cadjwgt");


  for (i=0; i<nvtxs; i++) 
    ptr[part[i]]++;
  MAKECSR(i, cnvtxs, ptr);
  for (i=0; i<nvtxs; i++) 
    ind[ptr[part[i]]++] = i;
  SHIFTCSR(i, cnvtxs, ptr);

  
  /* Create the adjacency list of the coarse/partition graph */
  for (cxadj[0]=cnedges=0, i=0; i<cnvtxs; i++) {
    cvwgt[2*i+0] = cvwgt[2*i+1] = 0;
    for (j=ptr[i]; j<ptr[i+1]; j++) {
      ii = ind[j];
      cvwgt[2*i+0] += vwgt[2*ii+0];
      cvwgt[2*i+1] += vwgt[2*ii+1];

      for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
        if ((k = part[adjncy[jj]]) == i)
          continue;
        if (marker[k] == -1) {
          cadjncy[cnedges] = k;
          cadjwgt[cnedges] = adjwgt[jj];
          marker[k] = cnedges++;
        }
        else 
          cadjwgt[marker[k]] += adjwgt[jj];
      }
    }
    cxadj[i+1] = cnedges;

    /* reset the marker array */
    for (j=cxadj[i]; j<cnedges; j++)
      marker[cadjncy[j]] = -1;
  }

  gk_free((void **)&ptr, &ind, &marker, LTERM);

  return cgraph;
}
Example #8
0
File: mesh.c Project: ADTG-VSSC/SU2
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind, 
                 idx_t *numflag, idx_t *ncommon, idx_t **r_xadj, 
		 idx_t **r_adjncy, MPI_Comm *comm)
{
  idx_t i, j, jj, k, kk, m;
  idx_t npes, mype, pe, count, mask, pass;
  idx_t nelms, lnns, my_nns, node;
  idx_t firstelm, firstnode, lnode, nrecv, nsend;
  idx_t *scounts, *rcounts, *sdispl, *rdispl;
  idx_t *nodedist, *nmap, *auxarray;
  idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL;
  idx_t *sbuffer, *rbuffer, *htable;
  ikv_t *nodelist, *recvbuffer;
  idx_t maxcount, *ind, *wgt;
  idx_t gmaxnode, gminnode;
  size_t curmem;

  gk_malloc_init();
  curmem = gk_GetCurMemoryUsed();
  
  /* Get basic comm info */
  gkMPI_Comm_size(*comm, &npes);
  gkMPI_Comm_rank(*comm, &mype);


  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag > 0) 
    ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind));
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMaxComm(*comm, imax(eptr[nelms], eind));


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERT(nelms > 0);

  /* construct node distribution array */
  nodedist = ismalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = ikvmalloc(eptr[nelms], "nodelist");
  auxarray = imalloc(eptr[nelms], "auxarray");
  htable   = ismalloc(gk_max(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(npes, "scounts");
  rcounts  = imalloc(npes, "rcounts");
  sdispl   = imalloc(npes+1, "sdispl");
  rdispl   = imalloc(npes+1, "rdispl");


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikvsorti(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = imalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  gkMPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERT(pe < npes);

  gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERT(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer");

  gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer, 
      rcounts, rdispl, IDX_T, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = ismalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERT(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = imalloc(gk_max(1, nsend), "sbuffer");

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERT(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv   = rdispl[npes];
  rbuffer = imalloc(gk_max(1, nrecv), "rbuffer");

  gkMPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_T, (void *)rbuffer, 
      rcounts, rdispl, IDX_T, *comm);

  k = -1;
  nptr = ismalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERT(k+1 == lnns);
  ASSERT(nptr[lnns] == nrecv)

  myxadj = *r_xadj = (idx_t *)malloc(sizeof(idx_t)*(nelms+1));
  if (myxadj == NULL) 
    gk_errexit(SIGMEM, "Failed to allocate memory for the dual graph's xadj array.\n");
  iset(nelms+1, 0, myxadj);

  iset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  maxcount = 200;
  ind = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: ind");
  wgt = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: wgt");

  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }

          /* Adjust the memory. 
             This will be replaced by a idxrealloc() when GKlib will be incorporated */
          if (count == maxcount-1) {
            maxcount *= 2;
            ind = irealloc(ind, maxcount, "ParMETIS_V3_Mesh2Dual: ind");
            wgt = irealloc(wgt, maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommon) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]);
      if (myadjncy == NULL)
        gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts,
      &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer,
      &nptr, &ind, &wgt, LTERM);

  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));
  }
  gk_malloc_cleanup(0);

  return METIS_OK;
}
Example #9
0
graph_t *FixGraph(graph_t *graph)
{
  idx_t i, j, k, l, nvtxs, nedges;
  idx_t *xadj, *adjncy, *adjwgt;
  idx_t *nxadj, *nadjncy, *nadjwgt;
  graph_t *ngraph;
  uvw_t *edges;


  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  ASSERT(adjwgt != NULL);

  ngraph = CreateGraph();

  ngraph->nvtxs = nvtxs;

  /* deal with vertex weights/sizes */
  ngraph->ncon  = graph->ncon;
  ngraph->vwgt  = icopy(nvtxs*graph->ncon, graph->vwgt, 
                        imalloc(nvtxs*graph->ncon, "FixGraph: vwgt"));

  ngraph->vsize = ismalloc(nvtxs, 1, "FixGraph: vsize");
  if (graph->vsize)
    icopy(nvtxs, graph->vsize, ngraph->vsize);

  /* fix graph by sorting the "superset" of edges */
  edges = (uvw_t *)gk_malloc(sizeof(uvw_t)*2*xadj[nvtxs], "FixGraph: edges");

  for (nedges=0, i=0; i<nvtxs; i++) {
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      /* keep only the upper-trianglular part of the adjacency matrix */
      if (i < adjncy[j]) {
        edges[nedges].u = i;
        edges[nedges].v = adjncy[j];
        edges[nedges].w = adjwgt[j];
        nedges++;
      }
      else if (i > adjncy[j]) {
        edges[nedges].u = adjncy[j];
        edges[nedges].v = i;
        edges[nedges].w = adjwgt[j];
        nedges++;
      }
    }
  }

  uvwsorti(nedges, edges);


  /* keep the unique subset */
  for (k=0, i=1; i<nedges; i++) {
    if (edges[k].v != edges[i].v || edges[k].u != edges[i].u) {
      edges[++k] = edges[i];
    }
  }
  nedges = k+1;

  /* allocate memory for the fixed graph */
  nxadj   = ngraph->xadj   = ismalloc(nvtxs+1, 0, "FixGraph: nxadj");
  nadjncy = ngraph->adjncy = imalloc(2*nedges, "FixGraph: nadjncy");
  nadjwgt = ngraph->adjwgt = imalloc(2*nedges, "FixGraph: nadjwgt");

  /* create the adjacency list of the fixed graph from the upper-triangular
     part of the adjacency matrix */
  for (k=0; k<nedges; k++) {
    nxadj[edges[k].u]++;
    nxadj[edges[k].v]++;
  }
  MAKECSR(i, nvtxs, nxadj);

  for (k=0; k<nedges; k++) {
    nadjncy[nxadj[edges[k].u]] = edges[k].v;
    nadjncy[nxadj[edges[k].v]] = edges[k].u;
    nadjwgt[nxadj[edges[k].u]] = edges[k].w;
    nadjwgt[nxadj[edges[k].v]] = edges[k].w;
    nxadj[edges[k].u]++;
    nxadj[edges[k].v]++;
  }
  SHIFTCSR(i, nvtxs, nxadj);

  gk_free((void **)&edges, LTERM);

  return ngraph;
}