Exemple #1
0
void CreateGraphDual(idx_t ne, idx_t nn, idx_t *eptr, idx_t *eind, idx_t ncommon, 
          idx_t **r_xadj, idx_t **r_adjncy)
{
  idx_t i, j, nnbrs;
  idx_t *nptr, *nind;
  idx_t *xadj, *adjncy;
  idx_t *marker, *nbrs;

  if (ncommon < 1) {
    printf("  Increased ncommon to 1, as it was initially %"PRIDX"\n", ncommon);
    ncommon = 1;
  }

  /* construct the node-element list first */
  nptr = ismalloc(nn+1, 0, "CreateGraphDual: nptr");
  nind = imalloc(eptr[ne], "CreateGraphDual: nind");

  for (i=0; i<ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nptr[eind[j]]++;
  }
  MAKECSR(i, nn, nptr);

  for (i=0; i<ne; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++)
      nind[nptr[eind[j]]++] = i;
  }
  SHIFTCSR(i, nn, nptr);


  /* Allocate memory for xadj, since you know its size.
     These are done using standard malloc as they are returned
     to the calling function */
  if ((xadj = (idx_t *)malloc((ne+1)*sizeof(idx_t))) == NULL) 
    gk_errexit(SIGMEM, "***Failed to allocate memory for xadj.\n");
  *r_xadj = xadj;
  iset(ne+1, 0, xadj);

  /* allocate memory for working arrays used by FindCommonElements */
  marker = ismalloc(ne, 0, "CreateGraphDual: marker");
  nbrs   = imalloc(ne, "CreateGraphDual: nbrs");

  for (i=0; i<ne; i++) {
    xadj[i] = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, 
                  nind, eptr, ncommon, marker, nbrs);
  }
  MAKECSR(i, ne, xadj);

  /* Allocate memory for adjncy, since you now know its size.
     These are done using standard malloc as they are returned
     to the calling function */
  if ((adjncy = (idx_t *)malloc(xadj[ne]*sizeof(idx_t))) == NULL) {
    free(xadj);
    *r_xadj = NULL;
    gk_errexit(SIGMEM, "***Failed to allocate memory for adjncy.\n");
  }
  *r_adjncy = adjncy;

  for (i=0; i<ne; i++) {
    nnbrs = FindCommonElements(i, eptr[i+1]-eptr[i], eind+eptr[i], nptr, 
                nind, eptr, ncommon, marker, nbrs);
    for (j=0; j<nnbrs; j++)
      adjncy[xadj[i]++] = nbrs[j];
  }
  SHIFTCSR(i, ne, xadj);
  
  gk_free((void **)&nptr, &nind, &marker, &nbrs, LTERM);
}
Exemple #2
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, nvtxs, nedges, ncon;
  idx_t mype, npes, srnpes, srmype; 
  idx_t *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
  idx_t *part, *lwhere, *home;
  idx_t lnparts, fpart, fpe, lnpes, ngroups;
  idx_t *rcounts, *rdispls;
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  idx_t sr_pe, gd_pe, sr, gd, who_wins;
  real_t my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
  real_t rating, max_rating, your_cost = -1.0, your_balance = -1.0;
  real_t lbsum, min_lbsum, *lbvec, *tpwgts, *tpwgts2, buffer[2];
  graph_t *agraph, cgraph;
  ctrl_t *myctrl;
  MPI_Status status;
  MPI_Comm ipcomm, srcomm;
  struct {
    double cost;
    int rank;
  } lpecost, gpecost;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));
  WCOREPUSH;

  vtxdist = graph->vtxdist;
  agraph  = AssembleAdaptiveGraph(ctrl, graph);
  nvtxs   = cgraph.nvtxs  = agraph->nvtxs;
  nedges  = cgraph.nedges = agraph->nedges;
  ncon    = cgraph.ncon   = agraph->ncon;
  xadj    = cgraph.xadj   = icopy(nvtxs+1, agraph->xadj, iwspacemalloc(ctrl, nvtxs+1));
  vwgt    = cgraph.vwgt   = icopy(nvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, nvtxs*ncon));
  vsize   = cgraph.vsize  = icopy(nvtxs, agraph->vsize, iwspacemalloc(ctrl, nvtxs));
  adjncy  = cgraph.adjncy = icopy(nedges, agraph->adjncy, iwspacemalloc(ctrl, nedges));
  adjwgt  = cgraph.adjwgt = icopy(nedges, agraph->adjwgt, iwspacemalloc(ctrl, nedges));
  part    = cgraph.where  = agraph->where = iwspacemalloc(ctrl, nvtxs);

  lwhere = iwspacemalloc(ctrl, nvtxs);
  home   = iwspacemalloc(ctrl, nvtxs);
  lbvec  = rwspacemalloc(ctrl, graph->ncon);


  /****************************************/
  /****************************************/
  if (ctrl->ps_relation == PARMETIS_PSR_UNCOUPLED) {
    WCOREPUSH;
    rcounts = iwspacemalloc(ctrl, ctrl->npes);
    rdispls = iwspacemalloc(ctrl, ctrl->npes+1);

    for (i=0; i<ctrl->npes; i++) 
      rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
    MAKECSR(i, ctrl->npes, rdispls);

    gkMPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_T,
        (void *)part, rcounts, rdispls, IDX_T, ctrl->comm);

    for (i=0; i<agraph->nvtxs; i++)
      home[i] = part[i];

    WCOREPOP;  /* local frees */
  }
  else {
    for (i=0; i<ctrl->npes; i++) {
      for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
        part[j] = home[j] = i;
    }
  }

  /* Ensure that the initial partitioning is legal */
  for (i=0; i<agraph->nvtxs; i++) {
    if (part[i] >= ctrl->nparts)
      part[i] = home[i] = part[i] % ctrl->nparts;
    if (part[i] < 0)
      part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
  }
  /****************************************/
  /****************************************/

  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      rprintf(ctrl, "input cut: %"PRIDX", balance: ", ComputeSerialEdgeCut(agraph)));
  for (i=0; i<agraph->ncon; i++)
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

  /****************************************/
  /* Split the processors into two groups */
  /****************************************/
  sr = (ctrl->mype % 2 == 0) ? 1 : 0;
  gd = (ctrl->mype % 2 == 1) ? 1 : 0;

  if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
    sr = 1;
    gd = 0;
  }

  sr_pe = 0;
  gd_pe = 1;

  gkMPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);

  if (sr == 1) { /* Half of the processors do scratch-remap */
    ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, npes), 1);
    gkMPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
    gkMPI_Comm_rank(srcomm, &srmype);
    gkMPI_Comm_size(srcomm, &srnpes);

    METIS_SetDefaultOptions(moptions);
    moptions[METIS_OPTION_SEED] = ctrl->sync + (mype % ngroups) + 1;

    tpwgts  = ctrl->tpwgts;
    tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

    iset(nvtxs, 0, lwhere);
    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = srnpes;
    while (lnpes > 1 && lnparts > 1) {
      PASSERT(ctrl, agraph->nvtxs > 1);
      /* determine the weights of the two partitions as a function of the 
         weight of the target partition weights */
      for (j=(lnparts>>1), i=0; i<ncon; i++) {
        tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
        tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
        wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
        tpwgts2[i]      *= wsum;
        tpwgts2[ncon+i] *= wsum;
      }

      METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, 
            agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, 
            &twoparts, tpwgts2, NULL, moptions, &edgecut, part);

      /* pick one of the branches */
      if (srmype < fpe+lnpes/2) {
        KeepPart(ctrl, agraph, part, 0);
        lnpes   = lnpes/2;
        lnparts = lnparts/2;
      }
      else {
        KeepPart(ctrl, agraph, part, 1);
        fpart   = fpart + lnparts/2;
        fpe     = fpe + lnpes/2;
        lnpes   = lnpes - lnpes/2;
        lnparts = lnparts - lnparts/2;
      }
    }

    if (lnparts == 1) { /* Case in which srnpes is greater or equal to nparts */
      /* Only the first process will assign labels (for the reduction to work) */
      if (srmype == fpe) {
        for (i=0; i<agraph->nvtxs; i++) 
          lwhere[agraph->label[i]] = fpart;
      }
    }
    else { /* Case in which srnpes is smaller than nparts */
      /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
      tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
      for (j=0; j<ncon; j++) {
        for (wsum=0.0, i=0; i<lnparts; i++) {
          tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
          wsum += tpwgts[i*ncon+j];
        }
        for (wsum=1.0/wsum, i=0; i<lnparts; i++)
          tpwgts[i*ncon+j] *= wsum;
      }

      METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
	    agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
            &edgecut, part);

      for (i=0; i<agraph->nvtxs; i++) 
        lwhere[agraph->label[i]] = fpart + part[i];
    }

    gkMPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_T, MPI_SUM, srcomm);

    edgecut = ComputeSerialEdgeCut(&cgraph);
    ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
    lbsum = rsum(ncon, lbvec, 1);
    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ipcomm);
    gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm);
    lpecost.rank = ctrl->mype;
    lpecost.cost = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) {
      if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon))
        lpecost.cost = (double)edgecut;
      else
        lpecost.cost = (double)max_cut + lbsum;
    }
    gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ipcomm);

    if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) 
      gkMPI_Send((void *)part, nvtxs, IDX_T, sr_pe, 1, ctrl->comm);

    if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) 
      gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status);

    if (ctrl->mype == sr_pe) {
      icopy(nvtxs, part, lwhere);
      SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
    }

    gkMPI_Comm_free(&srcomm);
  }
Exemple #3
0
/*****************************************************************************
* This function creates the nodal graph of a finite element mesh
******************************************************************************/
void HEXNODALMETIS(int nelmnts, int nvtxs, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy)
{
    int i, j, jj, k, kk, /*kkk, l, m, n,*/ nedges;
    idxtype *nptr, *nind;
    idxtype *mark;
    int table[8][3] = {{1, 3, 4},
        {0, 2, 5},
        {1, 3, 6},
        {0, 2, 7},
        {0, 5, 7},
        {1, 4, 6},
        {2, 5, 7},
        {3, 4, 6}
    };

    /* Construct the node-element list first */
    nptr = idxsmalloc(nvtxs+1, 0, "HEXNODALMETIS: nptr");
    for (j=8*nelmnts, i=0; i<j; i++)
        nptr[elmnts[i]]++;
    MAKECSR(i, nvtxs, nptr);

    nind = idxmalloc(nptr[nvtxs], "HEXNODALMETIS: nind");
    for (k=i=0; i<nelmnts; i++) {
        for (j=0; j<8; j++, k++)
            nind[nptr[elmnts[k]]++] = i;
    }
    for (i=nvtxs; i>0; i--)
        nptr[i] = nptr[i-1];
    nptr[0] = 0;


    mark = idxsmalloc(nvtxs, -1, "HEXNODALMETIS: mark");

    nedges = dxadj[0] = 0;
    for (i=0; i<nvtxs; i++) {
        mark[i] = i;
        for (j=nptr[i]; j<nptr[i+1]; j++) {
            jj=8*nind[j];
            for (k=0; k<8; k++) {
                if (elmnts[jj+k] == i)
                    break;
            }
            ASSERT(k != 8);

            /* You found the index, now go and put the 3 neighbors */
            kk = elmnts[jj+table[k][0]];
            if (mark[kk] != i) {
                mark[kk] = i;
                dadjncy[nedges++] = kk;
            }
            kk = elmnts[jj+table[k][1]];
            if (mark[kk] != i) {
                mark[kk] = i;
                dadjncy[nedges++] = kk;
            }
            kk = elmnts[jj+table[k][2]];
            if (mark[kk] != i) {
                mark[kk] = i;
                dadjncy[nedges++] = kk;
            }
        }
        dxadj[i+1] = nedges;
    }

    free(mark);
    free(nptr);
    free(nind);

}
Exemple #4
0
/*****************************************************************************
* This function creates the dual of a finite element mesh
******************************************************************************/
void GENDUALMETIS(int nelmnts, int nvtxs, int etype, idxtype *elmnts, idxtype *dxadj, idxtype *dadjncy)
{
    int i, j, jj, k, kk, kkk, l, m, n, /*nedges,*/ mask;
    idxtype *nptr, *nind;
    idxtype *mark, ind[200], wgt[200];
    int esize, esizes[] = {-1, 3, 4, 8, 4},
                          mgcnum, mgcnums[] = {-1, 2, 3, 4, 2};

    mask = (1<<11)-1;
    mark = idxsmalloc(mask+1, -1, "GENDUALMETIS: mark");

    /* Get the element size and magic number for the particular element */
    esize = esizes[etype];
    mgcnum = mgcnums[etype];

    /* Construct the node-element list first */
    nptr = idxsmalloc(nvtxs+1, 0, "GENDUALMETIS: nptr");
    for (j=esize*nelmnts, i=0; i<j; i++)
        nptr[elmnts[i]]++;
    MAKECSR(i, nvtxs, nptr);

    nind = idxmalloc(nptr[nvtxs], "GENDUALMETIS: nind");
    for (k=i=0; i<nelmnts; i++) {
        for (j=0; j<esize; j++, k++)
            nind[nptr[elmnts[k]]++] = i;
    }
    for (i=nvtxs; i>0; i--)
        nptr[i] = nptr[i-1];
    nptr[0] = 0;

    for (i=0; i<nelmnts; i++)
        dxadj[i] = esize*i;

    for (i=0; i<nelmnts; i++) {
        for (m=j=0; j<esize; j++) {
            n = elmnts[esize*i+j];
            for (k=nptr[n+1]-1; k>=nptr[n]; k--) {
                if ((kk = nind[k]) <= i)
                    break;

                kkk = kk&mask;
                if ((l = mark[kkk]) == -1) {
                    ind[m] = kk;
                    wgt[m] = 1;
                    mark[kkk] = m++;
                }
                else if (ind[l] == kk) {
                    wgt[l]++;
                }
                else {
                    for (jj=0; jj<m; jj++) {
                        if (ind[jj] == kk) {
                            wgt[jj]++;
                            break;
                        }
                    }
                    if (jj == m) {
                        ind[m] = kk;
                        wgt[m++] = 1;
                    }
                }
            }
        }
        for (j=0; j<m; j++) {
            if (wgt[j] == mgcnum) {
                k = ind[j];
                dadjncy[dxadj[i]++] = k;
                dadjncy[dxadj[k]++] = i;
            }
            mark[ind[j]&mask] = -1;
        }
    }

    /* Go and consolidate the dxadj and dadjncy */
    for (j=i=0; i<nelmnts; i++) {
        for (k=esize*i; k<dxadj[i]; k++, j++)
            dadjncy[j] = dadjncy[k];
        dxadj[i] = j;
    }
    for (i=nelmnts; i>0; i--)
        dxadj[i] = dxadj[i-1];
    dxadj[0] = 0;

    free(mark);
    free(nptr);
    free(nind);

}
Exemple #5
0
graph_t *FixGraph(graph_t *graph)
{
  idx_t i, j, k, l, nvtxs, nedges;
  idx_t *xadj, *adjncy, *adjwgt;
  idx_t *nxadj, *nadjncy, *nadjwgt;
  graph_t *ngraph;
  uvw_t *edges;


  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  ASSERT(adjwgt != NULL);

  ngraph = CreateGraph();

  ngraph->nvtxs = nvtxs;

  /* deal with vertex weights/sizes */
  ngraph->ncon  = graph->ncon;
  ngraph->vwgt  = icopy(nvtxs*graph->ncon, graph->vwgt, 
                        imalloc(nvtxs*graph->ncon, "FixGraph: vwgt"));

  ngraph->vsize = ismalloc(nvtxs, 1, "FixGraph: vsize");
  if (graph->vsize)
    icopy(nvtxs, graph->vsize, ngraph->vsize);

  /* fix graph by sorting the "superset" of edges */
  edges = (uvw_t *)gk_malloc(sizeof(uvw_t)*2*xadj[nvtxs], "FixGraph: edges");

  for (nedges=0, i=0; i<nvtxs; i++) {
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      /* keep only the upper-trianglular part of the adjacency matrix */
      if (i < adjncy[j]) {
        edges[nedges].u = i;
        edges[nedges].v = adjncy[j];
        edges[nedges].w = adjwgt[j];
        nedges++;
      }
      else if (i > adjncy[j]) {
        edges[nedges].u = adjncy[j];
        edges[nedges].v = i;
        edges[nedges].w = adjwgt[j];
        nedges++;
      }
    }
  }

  uvwsorti(nedges, edges);


  /* keep the unique subset */
  for (k=0, i=1; i<nedges; i++) {
    if (edges[k].v != edges[i].v || edges[k].u != edges[i].u) {
      edges[++k] = edges[i];
    }
  }
  nedges = k+1;

  /* allocate memory for the fixed graph */
  nxadj   = ngraph->xadj   = ismalloc(nvtxs+1, 0, "FixGraph: nxadj");
  nadjncy = ngraph->adjncy = imalloc(2*nedges, "FixGraph: nadjncy");
  nadjwgt = ngraph->adjwgt = imalloc(2*nedges, "FixGraph: nadjwgt");

  /* create the adjacency list of the fixed graph from the upper-triangular
     part of the adjacency matrix */
  for (k=0; k<nedges; k++) {
    nxadj[edges[k].u]++;
    nxadj[edges[k].v]++;
  }
  MAKECSR(i, nvtxs, nxadj);

  for (k=0; k<nedges; k++) {
    nadjncy[nxadj[edges[k].u]] = edges[k].v;
    nadjncy[nxadj[edges[k].v]] = edges[k].u;
    nadjwgt[nxadj[edges[k].u]] = edges[k].w;
    nadjwgt[nxadj[edges[k].v]] = edges[k].w;
    nxadj[edges[k].u]++;
    nxadj[edges[k].v]++;
  }
  SHIFTCSR(i, nvtxs, nxadj);

  gk_free((void **)&edges, LTERM);

  return ngraph;
}