Esempio n. 1
0
/*************************************************************************
* This function implements a simple graph adaption strategy.
**************************************************************************/
void Mc_AdaptGraph(graph_t *graph, idx_t *part, idx_t ncon, idx_t nparts, MPI_Comm comm)
{
  idx_t h, i;
  idx_t nvtxs;
  idx_t npes, mype;
  idx_t *vwgt, *pwgts;

  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  nvtxs = graph->nvtxs;
  vwgt = graph->vwgt;
  pwgts = ismalloc(nparts*ncon, 1, "pwgts");

  if (mype == 0) {
    for (i=0; i<nparts; i++)
      for (h=0; h<ncon; h++)
        pwgts[i*ncon+h] = RandomInRange(20)+1;
  }

  MPI_Bcast((void *)pwgts, nparts*ncon, IDX_T, 0, comm);

  for (i=0; i<nvtxs; i++)
    for (h=0; h<ncon; h++)
      vwgt[i*ncon+h] = pwgts[part[i]*ncon+h];

  free(pwgts);
  return;
}
Esempio n. 2
0
int main(int argc, char *argv[])
{
  idx_t mype, npes;
  MPI_Comm comm;

  MPI_Init(&argc, &argv);
  MPI_Comm_dup(MPI_COMM_WORLD, &comm);
  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  if (argc != 2 && argc != 3) {
    if (mype == 0)
      printf("Usage: %s <graph-file> [coord-file]\n", argv[0]);

    MPI_Finalize();
    exit(0);
  }

  TestParMetis_GPart(argv[1], (argc == 3 ? argv[2] : NULL), comm); 

  gkMPI_Comm_free(&comm);

  MPI_Finalize();

  return 0;
}
Esempio n. 3
0
/*************************************************************************
* This function implements a simple graph adaption strategy.
**************************************************************************/
void AdaptGraph(graph_t *graph, idx_t afactor, MPI_Comm comm)
{
  idx_t i, nvtxs, nadapt, firstvtx, lastvtx;
  idx_t npes, mype, mypwgt, max, min, sum;
  idx_t *vwgt, *xadj, *adjncy, *adjwgt, *perm;

  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  srand(mype*afactor);

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  if (graph->adjwgt == NULL)
    adjwgt = graph->adjwgt = ismalloc(graph->nedges, 1, "AdaptGraph: adjwgt");
  else
    adjwgt = graph->adjwgt;
  vwgt = graph->vwgt;

  firstvtx = graph->vtxdist[mype];
  lastvtx = graph->vtxdist[mype+1];

  perm = imalloc(nvtxs, "AdaptGraph: perm");
  FastRandomPermute(nvtxs, perm, 1);

  nadapt = RandomInRange(nvtxs);
  nadapt = RandomInRange(nvtxs);
  nadapt = RandomInRange(nvtxs);

  for (i=0; i<nadapt; i++)
    vwgt[perm[i]] = afactor*vwgt[perm[i]];

/*
  for (i=0; i<nvtxs; i++) {
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (k >= firstvtx && k < lastvtx) {
	adjwgt[j] = (int)pow(1.0*(gk_min(vwgt[i],vwgt[k-firstvtx])), .6667);
        if (adjwgt[j] == 0)
          adjwgt[j] = 1;
      }
    }
  }
*/

  mypwgt = isum(nvtxs, vwgt, 1);

  MPI_Allreduce((void *)&mypwgt, (void *)&max, 1, IDX_T, MPI_MAX, comm);
  MPI_Allreduce((void *)&mypwgt, (void *)&min, 1, IDX_T, MPI_MIN, comm);
  MPI_Allreduce((void *)&mypwgt, (void *)&sum, 1, IDX_T, MPI_SUM, comm);

  if (mype == 0)
    printf("Initial Load Imbalance: %5.4"PRREAL", [%5"PRIDX" %5"PRIDX" %5"PRIDX"] for afactor: %"PRIDX"\n", (1.0*max*npes)/(1.0*sum), min, max, sum, afactor);

  free(perm);
}
Esempio n. 4
0
idx_t ComputeRealCutFromMoved(idx_t *vtxdist, idx_t *mvtxdist, idx_t *part, 
    idx_t *mpart, char *filename, MPI_Comm comm)
{
  idx_t i, j, nvtxs, mype, npes, cut;
  idx_t *xadj, *adjncy, *gpart, *gmpart, *perm, *sizes;
  MPI_Status status;


  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  if (mype != 0) {
    gkMPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, 1, comm);
    gkMPI_Send((void *)mpart, mvtxdist[mype+1]-mvtxdist[mype], IDX_T, 0, 1, comm);
  }
  else {  /* Processor 0 does all the rest */
    gpart = imalloc(vtxdist[npes], "ComputeRealCut: gpart");
    icopy(vtxdist[1], part, gpart);
    gmpart = imalloc(mvtxdist[npes], "ComputeRealCut: gmpart");
    icopy(mvtxdist[1], mpart, gmpart);

    for (i=1; i<npes; i++) {
      gkMPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_T, i, 1, comm, &status);
      gkMPI_Recv((void *)(gmpart+mvtxdist[i]), mvtxdist[i+1]-mvtxdist[i], IDX_T, i, 1, comm, &status);
    }

    /* OK, now go and reconstruct the permutation to go from the graph to mgraph */
    perm  = imalloc(vtxdist[npes], "ComputeRealCut: perm");
    sizes = ismalloc(npes+1, 0, "ComputeRealCut: sizes");

    for (i=0; i<vtxdist[npes]; i++)
      sizes[gpart[i]]++;
    MAKECSR(i, npes, sizes);
    for (i=0; i<vtxdist[npes]; i++)
      perm[i] = sizes[gpart[i]]++;

    /* Ok, now read the graph from the file */
    ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy);

    /* OK, now compute the cut */
    for (cut=0, i=0; i<nvtxs; i++) {
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        if (gmpart[perm[i]] != gmpart[perm[adjncy[j]]])
          cut++;
      }
    }
    cut = cut/2;

    gk_free((void **)&gpart, &gmpart, &perm, &sizes, &xadj, &adjncy, LTERM);

    return cut;
  }

  return 0;
}
Esempio n. 5
0
/*************************************************************************
* This function implements a simple graph adaption strategy.
**************************************************************************/
void AdaptGraph2(graph_t *graph, idx_t afactor, MPI_Comm comm)
{
  idx_t i, j, k, nvtxs, firstvtx, lastvtx;
  idx_t npes, mype, mypwgt, max, min, sum;
  idx_t *vwgt, *xadj, *adjncy, *adjwgt;

  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  srand(mype*afactor);

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  if (graph->adjwgt == NULL)
    adjwgt = graph->adjwgt = ismalloc(graph->nedges, 1, "AdaptGraph: adjwgt");
  else
    adjwgt = graph->adjwgt;
  vwgt = graph->vwgt;

  firstvtx = graph->vtxdist[mype];
  lastvtx = graph->vtxdist[mype+1];


/*  if (RandomInRange(npes+1) < .05*npes) { */ 
  if (RandomInRange(npes+1) < 2) { 
    printf("[%"PRIDX"] is adapting\n", mype);
    for (i=0; i<nvtxs; i++)
      vwgt[i] = afactor*vwgt[i];
  }

  for (i=0; i<nvtxs; i++) {
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (k >= firstvtx && k < lastvtx) {
	adjwgt[j] = (int)pow(1.0*(gk_min(vwgt[i],vwgt[k-firstvtx])), .6667);
        if (adjwgt[j] == 0)
          adjwgt[j] = 1;
      }
    }
  }
      
  mypwgt = isum(nvtxs, vwgt, 1);

  gkMPI_Allreduce((void *)&mypwgt, (void *)&max, 1, IDX_T, MPI_MAX, comm);
  gkMPI_Allreduce((void *)&mypwgt, (void *)&min, 1, IDX_T, MPI_MIN, comm);
  gkMPI_Allreduce((void *)&mypwgt, (void *)&sum, 1, IDX_T, MPI_SUM, comm);

  if (mype == 0)
    printf("Initial Load Imbalance: %5.4"PRREAL", [%5"PRIDX" %5"PRIDX" %5"PRIDX"]\n", (1.0*max*npes)/(1.0*sum), min, max, sum);

}
Esempio n. 6
0
idx_t ComputeRealCut(idx_t *vtxdist, idx_t *part, char *filename, MPI_Comm comm)
{
  idx_t i, j, nvtxs, mype, npes, cut;
  idx_t *xadj, *adjncy, *gpart;
  MPI_Status status;

  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  if (mype != 0) {
    gkMPI_Send((void *)part, vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, 1, comm);
  }
  else {  /* Processor 0 does all the rest */
    gpart = imalloc(vtxdist[npes], "ComputeRealCut: gpart");
    icopy(vtxdist[1], part, gpart);

    for (i=1; i<npes; i++) 
      gkMPI_Recv((void *)(gpart+vtxdist[i]), vtxdist[i+1]-vtxdist[i], IDX_T, i, 1, comm, &status);

    ReadMetisGraph(filename, &nvtxs, &xadj, &adjncy);

    /* OK, now compute the cut */
    for (cut=0, i=0; i<nvtxs; i++) {
      for (j=xadj[i]; j<xadj[i+1]; j++) {
        if (gpart[i] != gpart[adjncy[j]])
          cut++;
      }
    }
    cut = cut/2;

    gk_free((void **)&gpart, &xadj, &adjncy, LTERM);

    return cut;
  }
  return 0;
}
Esempio n. 7
0
/*************************************************************************
* This function is the entry point of the initial partition algorithm
* that does recursive bissection.
* This algorithm assembles the graph to all the processors and preceeds
* by parallelizing the recursive bisection step.
**************************************************************************/
void InitPartition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, ncon, mype, npes, gnvtxs, ngroups;
  idx_t *xadj, *adjncy, *adjwgt, *vwgt;
  idx_t *part, *gwhere0, *gwhere1;
  idx_t *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt;
  graph_t *agraph;
  idx_t lnparts, fpart, fpe, lnpes; 
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  real_t *tpwgts, *tpwgts2, *lbvec, lbsum, min_lbsum, wsum;
  MPI_Comm ipcomm;
  struct {
    double sum;
    int rank;
  } lpesum, gpesum;

  WCOREPUSH;

  ncon = graph->ncon;

  ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, ctrl->npes), 1);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

  lbvec = rwspacemalloc(ctrl, ncon);

  /* assemble the graph to all the processors */
  agraph = AssembleAdaptiveGraph(ctrl, graph);
  gnvtxs = agraph->nvtxs;

  /* make a copy of the graph's structure for later */
  xadj   = icopy(gnvtxs+1, agraph->xadj, iwspacemalloc(ctrl, gnvtxs+1));
  vwgt   = icopy(gnvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, gnvtxs*ncon));
  adjncy = icopy(agraph->nedges, agraph->adjncy, iwspacemalloc(ctrl, agraph->nedges));
  adjwgt = icopy(agraph->nedges, agraph->adjwgt, iwspacemalloc(ctrl, agraph->nedges));
  part   = iwspacemalloc(ctrl, gnvtxs);

  /* create different processor groups */
  gkMPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);


  /* Go into the recursive bisection */
  METIS_SetDefaultOptions(moptions);
  moptions[METIS_OPTION_SEED] = ctrl->sync + (ctrl->mype % ngroups) + 1;

  tpwgts  = ctrl->tpwgts;
  tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

  lnparts = ctrl->nparts;
  fpart = fpe = 0;
  lnpes = npes;
  while (lnpes > 1 && lnparts > 1) {
    /* determine the weights of the two partitions as a function of the 
       weight of the target partition weights */
    for (j=(lnparts>>1), i=0; i<ncon; i++) {
      tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
      tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
      wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
      tpwgts2[i]      *= wsum;
      tpwgts2[ncon+i] *= wsum;
    }

    METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &twoparts, tpwgts2, NULL, moptions, 
          &edgecut, part);

    /* pick one of the branches */
    if (mype < fpe+lnpes/2) {
      KeepPart(ctrl, agraph, part, 0);
      lnpes   = lnpes/2;
      lnparts = lnparts/2;
    }
    else {
      KeepPart(ctrl, agraph, part, 1);
      fpart   = fpart + lnparts/2;
      fpe     = fpe + lnpes/2;
      lnpes   = lnpes - lnpes/2;
      lnparts = lnparts - lnparts/2;
    }
  }

  gwhere0 = iset(gnvtxs, 0, iwspacemalloc(ctrl, gnvtxs));
  gwhere1 = iwspacemalloc(ctrl, gnvtxs);

  if (lnparts == 1) { /* Case npes is greater than or equal to nparts */
    /* Only the first process will assign labels (for the reduction to work) */
    if (mype == fpe) {
      for (i=0; i<agraph->nvtxs; i++) 
        gwhere0[agraph->label[i]] = fpart;
    }
  }
  else { /* Case in which npes is smaller than nparts */
    /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
    tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
    for (j=0; j<ncon; j++) {
      for (wsum=0.0, i=0; i<lnparts; i++) {
        tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
        wsum += tpwgts[i*ncon+j];
      }
      for (wsum=1.0/wsum, i=0; i<lnparts; i++) 
        tpwgts[i*ncon+j] *= wsum;
    }

    METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
          agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
          &edgecut, part);

    for (i=0; i<agraph->nvtxs; i++) 
      gwhere0[agraph->label[i]] = fpart + part[i];
  }

  gkMPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_T, MPI_SUM, ipcomm);

  if (ngroups > 1) {
    tmpxadj   = agraph->xadj;
    tmpadjncy = agraph->adjncy;
    tmpadjwgt = agraph->adjwgt;
    tmpvwgt   = agraph->vwgt;
    tmpwhere  = agraph->where;

    agraph->xadj   = xadj;
    agraph->adjncy = adjncy;
    agraph->adjwgt = adjwgt;
    agraph->vwgt   = vwgt;
    agraph->where  = gwhere1;
    agraph->vwgt   = vwgt;
    agraph->nvtxs  = gnvtxs;

    edgecut = ComputeSerialEdgeCut(agraph);
    ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec);
    lbsum = rsum(ncon, lbvec, 1);

    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut,   1, IDX_T,  MPI_MAX, ctrl->gcomm);
    gkMPI_Allreduce((void *)&lbsum,   (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ctrl->gcomm);

    lpesum.sum = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION*ncon) {
      if (lbsum < UNBALANCE_FRACTION*ncon)
        lpesum.sum = edgecut;
      else
        lpesum.sum = max_cut;
    } 
    lpesum.rank = ctrl->mype;
    
    gkMPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ctrl->gcomm);
    gkMPI_Bcast((void *)gwhere1, gnvtxs, IDX_T, gpesum.rank, ctrl->gcomm);

    agraph->xadj   = tmpxadj;
    agraph->adjncy = tmpadjncy;
    agraph->adjwgt = tmpadjwgt;
    agraph->vwgt   = tmpvwgt;
    agraph->where  = tmpwhere;
  }

  icopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where);

  FreeGraph(agraph);
  gkMPI_Comm_free(&ipcomm);

  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

  WCOREPOP;
}
Esempio n. 8
0
/*************************************************************************
* Let the game begin
**************************************************************************/
int main(int argc, char *argv[])
{
  idx_t i, j, npes, mype, optype, nparts, adptf, options[10];
  idx_t *part=NULL, *sizes=NULL;
  graph_t graph;
  real_t ipc2redist, *xyz=NULL, *tpwgts=NULL, ubvec[MAXNCON];
  MPI_Comm comm;
  idx_t numflag=0, wgtflag=0, ndims, edgecut;
  char xyzfilename[8192];

  MPI_Init(&argc, &argv);
  MPI_Comm_dup(MPI_COMM_WORLD, &comm);
  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  if (argc != 8) {
    if (mype == 0)
      printf("Usage: %s <graph-file> <op-type> <nparts> <adapth-factor> <ipc2redist> <dbglvl> <seed>\n", argv[0]);

    MPI_Finalize();
    exit(0);
  }

  optype     = atoi(argv[2]);
  nparts     = atoi(argv[3]);
  adptf      = atoi(argv[4]);
  ipc2redist = atof(argv[5]);

  options[0] = 1;
  options[PMV3_OPTION_DBGLVL] = atoi(argv[6]);
  options[PMV3_OPTION_SEED]   = atoi(argv[7]);

  if (mype == 0) 
    printf("reading file: %s\n", argv[1]);
  ParallelReadGraph(&graph, argv[1], comm);

  /* Remove the edges for testing */
  /*iset(graph.vtxdist[mype+1]-graph.vtxdist[mype]+1, 0, graph.xadj); */

  rset(graph.ncon, 1.05, ubvec);
  tpwgts = rmalloc(nparts*graph.ncon, "tpwgts");
  rset(nparts*graph.ncon, 1.0/(real_t)nparts, tpwgts);

  /*
  ChangeToFortranNumbering(graph.vtxdist, graph.xadj, graph.adjncy, mype, npes); 
  numflag = 1;

  nvtxs = graph.vtxdist[mype+1]-graph.vtxdist[mype];
  nedges = graph.xadj[nvtxs];
  printf("%"PRIDX" %"PRIDX"\n", isum(nvtxs, graph.xadj, 1), isum(nedges, graph.adjncy, 1));
  */


  if (optype >= 20) { 
    sprintf(xyzfilename, "%s.xyz", argv[1]);
    xyz = ReadTestCoordinates(&graph, xyzfilename, &ndims, comm);
  }

  if (mype == 0) 
    printf("finished reading file: %s\n", argv[1]);
  
  part  = ismalloc(graph.nvtxs, mype%nparts, "main: part");
  sizes = imalloc(2*npes, "main: sizes");

  switch (optype) {
    case 1: 
      wgtflag = 3;
      ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, 
          options, &edgecut, part, &comm);
      WritePVector(argv[1], graph.vtxdist, part, MPI_COMM_WORLD); 
      break;
    case 2:
      wgtflag = 3;
      options[PMV3_OPTION_PSR] = PARMETIS_PSR_COUPLED;
      ParMETIS_V3_RefineKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, 
          options, &edgecut, part, &comm);
      WritePVector(argv[1], graph.vtxdist, part, MPI_COMM_WORLD); 
      break;
    case 3:
      options[PMV3_OPTION_PSR] = PARMETIS_PSR_COUPLED;
      graph.vwgt = ismalloc(graph.nvtxs, 1, "main: vwgt");
      if (npes > 1) {
        AdaptGraph(&graph, adptf, comm);
      }
      else {
        wgtflag = 3;
        ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
            graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, 
            ubvec, options, &edgecut, part, &comm);

        printf("Initial partitioning with edgecut of %"PRIDX"\n", edgecut);
        for (i=0; i<graph.ncon; i++) {
          for (j=0; j<graph.nvtxs; j++) {
            if (part[j] == i)
              graph.vwgt[j*graph.ncon+i] = adptf; 
            else
              graph.vwgt[j*graph.ncon+i] = 1; 
          }
        }
      }

      wgtflag = 3;
      ParMETIS_V3_AdaptiveRepart(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          NULL, graph.adjwgt, &wgtflag, &numflag, &graph.ncon, &nparts, tpwgts, ubvec, 
	  &ipc2redist, options, &edgecut, part, &comm);
      break;
    case 4: 
      ParMETIS_V3_NodeND(graph.vtxdist, graph.xadj, graph.adjncy, &numflag, options, 
          part, sizes, &comm);
      /* WriteOVector(argv[1], graph.vtxdist, part, comm);   */
      break;

    case 5: 
      ParMETIS_SerialNodeND(graph.vtxdist, graph.xadj, graph.adjncy, &numflag, options, 
          part, sizes, &comm);
      /* WriteOVector(argv[1], graph.vtxdist, part, comm);  */ 
      printf("%"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX" %"PRIDX"\n", sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], sizes[5], sizes[6]);
      break;
    case 11: 
      /* TestAdaptiveMETIS(graph.vtxdist, graph.xadj, graph.adjncy, part, options, adptf, comm); */
      break;
    case 20: 
      wgtflag = 3;
      ParMETIS_V3_PartGeomKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          graph.adjwgt, &wgtflag, &numflag, &ndims, xyz, &graph.ncon, &nparts, 
          tpwgts, ubvec, options, &edgecut, part, &comm);
      break;
    case 21: 
      ParMETIS_V3_PartGeom(graph.vtxdist, &ndims, xyz, part, &comm);
      break;
  }

  /* printf("%"PRIDX" %"PRIDX"\n", isum(nvtxs, graph.xadj, 1), isum(nedges, graph.adjncy, 1)); */

  gk_free((void **)&part, &sizes, &tpwgts, &graph.vtxdist, &graph.xadj, &graph.adjncy, 
         &graph.vwgt, &graph.adjwgt, &xyz, LTERM);

  MPI_Comm_free(&comm);

  MPI_Finalize();

  return 0;
}
Esempio n. 9
0
void TestParMetis_GPart(char *filename, char *xyzfile, MPI_Comm comm)
{
  idx_t ncon, nparts, npes, mype, opt2, realcut;
  graph_t graph, mgraph;
  idx_t *part, *mpart, *savepart, *order, *sizes;
  idx_t numflag=0, wgtflag=0, options[10], edgecut, ndims;
  real_t ipc2redist, *xyz=NULL, *tpwgts = NULL, ubvec[MAXNCON];

  gkMPI_Comm_size(comm, &npes);
  gkMPI_Comm_rank(comm, &mype);

  ParallelReadGraph(&graph, filename, comm);
  if (xyzfile)
    xyz = ReadTestCoordinates(&graph, xyzfile, &ndims, comm);
  gkMPI_Barrier(comm);

  part   = imalloc(graph.nvtxs, "TestParMetis_V3: part");
  tpwgts = rmalloc(MAXNCON*npes*2, "TestParMetis_V3: tpwgts");
  rset(MAXNCON, 1.05, ubvec);

  graph.vwgt = ismalloc(graph.nvtxs*5, 1, "TestParMetis_GPart: vwgt");


  /*======================================================================
  / ParMETIS_V3_PartKway
  /=======================================================================*/
  options[0] = 1;
  options[1] = 3;
  options[2] = 1;
  wgtflag = 2;
  numflag = 0;
  edgecut = 0;

  for (nparts=2*npes; nparts>=npes/2 && nparts > 0; nparts = nparts/2) {
    for (ncon=1; ncon<=NCON; ncon++) {
      if (ncon > 1 && nparts > 1)
        Mc_AdaptGraph(&graph, part, ncon, nparts, comm);
      else
        iset(graph.nvtxs, 1, graph.vwgt);

      if (mype == 0)
        printf("\nTesting ParMETIS_V3_PartKway with ncon: %"PRIDX", nparts: %"PRIDX"\n", ncon, nparts);

      rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);
      ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, 
          &edgecut, part, &comm);

      realcut = ComputeRealCut(graph.vtxdist, part, filename, comm);
      if (mype == 0) {
        printf("ParMETIS_V3_PartKway reported a cut of %"PRIDX" [%s:%"PRIDX"]\n", edgecut,
            (edgecut == realcut ? "OK" : "ERROR"), realcut);
      }

      if (mype == 0)
        printf("\nTesting ParMETIS_V3_RefineKway with ncon: %"PRIDX", nparts: %"PRIDX"\n", ncon, nparts);

      options[3] = PARMETIS_PSR_UNCOUPLED;
      ParMETIS_V3_RefineKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
          NULL, &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, 
          &edgecut, part, &comm);

      realcut = ComputeRealCut(graph.vtxdist, part, filename, comm);
      if (mype == 0) {
        printf("ParMETIS_V3_RefineKway reported a cut of %"PRIDX" [%s:%"PRIDX"]\n", edgecut,
            (edgecut == realcut ? "OK" : "ERROR"), realcut);
      }
    }
  }


  /*======================================================================
  / ParMETIS_V3_PartGeomKway 
  /=======================================================================*/
  if (xyzfile != NULL) {
    options[0] = 1;
    options[1] = 3;
    options[2] = 1;
    wgtflag = 2;
    numflag = 0;

    for (nparts=2*npes; nparts>=npes/2 && nparts > 0; nparts = nparts/2) {
      for (ncon=1; ncon<=NCON; ncon++) {
        if (ncon > 1)
          Mc_AdaptGraph(&graph, part, ncon, nparts, comm);
        else
          iset(graph.nvtxs, 1, graph.vwgt);
  
        if (mype == 0)
          printf("\nTesting ParMETIS_V3_PartGeomKway with ncon: %"PRIDX", nparts: %"PRIDX"\n", ncon, nparts);
  
        rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);
        ParMETIS_V3_PartGeomKway(graph.vtxdist, graph.xadj, graph.adjncy, graph.vwgt, 
            NULL, &wgtflag, &numflag, &ndims, xyz, &ncon, &nparts, tpwgts, ubvec, 
            options, &edgecut, part, &comm);
  
        realcut = ComputeRealCut(graph.vtxdist, part, filename, comm);
        if (mype == 0) 
          printf("ParMETIS_V3_PartGeomKway reported a cut of %"PRIDX" [%s:%"PRIDX"]\n", edgecut,
              (edgecut == realcut ? "OK" : "ERROR"), realcut);
      }
    }
  }



  /*======================================================================
  / ParMETIS_V3_PartGeom 
  /=======================================================================*/
  if (xyz != NULL) {
    wgtflag = 0;
    numflag = 0;
    if (mype == 0)
      printf("\nTesting ParMETIS_V3_PartGeom\n");

      ParMETIS_V3_PartGeom(graph.vtxdist, &ndims, xyz, part, &comm); 

    realcut = ComputeRealCut(graph.vtxdist, part, filename, comm);
    if (mype == 0) 
      printf("ParMETIS_V3_PartGeom reported a cut of %"PRIDX"\n", realcut);
  }


  /*======================================================================
  / Coupled ParMETIS_V3_RefineKway 
  /=======================================================================*/
  options[0] = 1;
  options[1] = 3;
  options[2] = 1;
  options[3] = PARMETIS_PSR_COUPLED;
  nparts = npes;
  wgtflag = 0;
  numflag = 0;
  ncon = 1;
  rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);

  if (mype == 0)
    printf("\nTesting coupled ParMETIS_V3_RefineKway with default options (before move)\n");

  ParMETIS_V3_RefineKway(graph.vtxdist, graph.xadj, graph.adjncy, NULL, NULL, 
      &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, 
      part, &comm);





  /* Compute a good partition and move the graph. Do so quietly! */
  options[0] = 0;
  nparts = npes;
  wgtflag = 0;
  numflag = 0;
  ncon = 1;
  rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);
  ParMETIS_V3_PartKway(graph.vtxdist, graph.xadj, graph.adjncy, NULL, NULL, 
      &wgtflag, &numflag, &ncon, &npes, tpwgts, ubvec, options, &edgecut, 
      part, &comm);
  TestMoveGraph(&graph, &mgraph, part, comm);
  gk_free((void **)&(graph.vwgt), LTERM);
  mpart    = ismalloc(mgraph.nvtxs, mype, "TestParMetis_V3: mpart");
  savepart = imalloc(mgraph.nvtxs, "TestParMetis_V3: savepart");



  /*======================================================================
  / Coupled ParMETIS_V3_RefineKway 
  /=======================================================================*/
  options[0] = 1;
  options[1] = 3;
  options[2] = 1;
  options[3] = PARMETIS_PSR_COUPLED;
  nparts  = npes;
  wgtflag = 0;
  numflag = 0;

  for (ncon=1; ncon<=NCON; ncon++) {
    if (mype == 0)
      printf("\nTesting coupled ParMETIS_V3_RefineKway with ncon: %"PRIDX", nparts: %"PRIDX"\n", ncon, nparts);

    rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);
      ParMETIS_V3_RefineKway(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, NULL, NULL, 
          &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, 
          mpart, &comm);

    realcut = ComputeRealCutFromMoved(graph.vtxdist, mgraph.vtxdist, part, mpart, 
                  filename, comm);
    if (mype == 0) 
      printf("ParMETIS_V3_RefineKway reported a cut of %"PRIDX" [%s:%"PRIDX"]\n", edgecut,
          (edgecut == realcut ? "OK" : "ERROR"), realcut);
  }


/*ADAPTIVE:*/
  /*======================================================================
  / ParMETIS_V3_AdaptiveRepart
  /=======================================================================*/
  mgraph.vwgt  = ismalloc(mgraph.nvtxs*NCON, 1, "TestParMetis_V3: mgraph.vwgt");
  mgraph.vsize = ismalloc(mgraph.nvtxs, 1, "TestParMetis_V3: mgraph.vsize");
  AdaptGraph(&mgraph, 4, comm); 
  options[0] = 1;
  options[1] = 7;
  options[2] = 1;
  options[3] = PARMETIS_PSR_COUPLED;
  wgtflag = 2;
  numflag = 0;

  for (nparts=2*npes; nparts>=npes/2; nparts = nparts/2) {
    options[0] = 0;
    ncon    = 1;
    wgtflag = 0;
    rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);
    ParMETIS_V3_PartKway(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, NULL, NULL, 
        &wgtflag, &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, 
        savepart, &comm);

    options[0] = 1;
    wgtflag    = 2;

    for (ncon=1; ncon<=NCON; ncon++) {
      rset(nparts*ncon, 1.0/(real_t)nparts, tpwgts);

      if (ncon > 1)
        Mc_AdaptGraph(&mgraph, savepart, ncon, nparts, comm);
      else
        AdaptGraph(&mgraph, 4, comm); 

      for (ipc2redist=1000.0; ipc2redist>=0.001; ipc2redist/=1000.0) {
        icopy(mgraph.nvtxs, savepart, mpart);

        if (mype == 0)
          printf("\nTesting ParMETIS_V3_AdaptiveRepart with ipc2redist: %.3"PRREAL", ncon: %"PRIDX", nparts: %"PRIDX"\n", 
              ipc2redist, ncon, nparts);

        ParMETIS_V3_AdaptiveRepart(mgraph.vtxdist, mgraph.xadj, mgraph.adjncy, 
            mgraph.vwgt, mgraph.vsize, NULL, &wgtflag, &numflag, &ncon, &nparts, 
            tpwgts, ubvec, &ipc2redist, options, &edgecut, mpart, &comm);

        realcut = ComputeRealCutFromMoved(graph.vtxdist, mgraph.vtxdist, part, mpart, 
                      filename, comm);
        if (mype == 0) 
          printf("ParMETIS_V3_AdaptiveRepart reported a cut of %"PRIDX" [%s:%"PRIDX"]\n", edgecut,
              (edgecut == realcut ? "OK" : "ERROR"), realcut);
      }
    }
  }

  gk_free((void **)&tpwgts, &part, &mpart, &savepart, &xyz, &mgraph.vwgt, &mgraph.vsize, LTERM);
}
Esempio n. 10
0
File: mesh.c Progetto: ADTG-VSSC/SU2
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
int ParMETIS_V3_Mesh2Dual(idx_t *elmdist, idx_t *eptr, idx_t *eind, 
                 idx_t *numflag, idx_t *ncommon, idx_t **r_xadj, 
		 idx_t **r_adjncy, MPI_Comm *comm)
{
  idx_t i, j, jj, k, kk, m;
  idx_t npes, mype, pe, count, mask, pass;
  idx_t nelms, lnns, my_nns, node;
  idx_t firstelm, firstnode, lnode, nrecv, nsend;
  idx_t *scounts, *rcounts, *sdispl, *rdispl;
  idx_t *nodedist, *nmap, *auxarray;
  idx_t *gnptr, *gnind, *nptr, *nind, *myxadj=NULL, *myadjncy = NULL;
  idx_t *sbuffer, *rbuffer, *htable;
  ikv_t *nodelist, *recvbuffer;
  idx_t maxcount, *ind, *wgt;
  idx_t gmaxnode, gminnode;
  size_t curmem;

  gk_malloc_init();
  curmem = gk_GetCurMemoryUsed();
  
  /* Get basic comm info */
  gkMPI_Comm_size(*comm, &npes);
  gkMPI_Comm_rank(*comm, &mype);


  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag > 0) 
    ChangeNumberingMesh(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMinComm(*comm, imin(eptr[nelms], eind));
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMaxComm(*comm, imax(eptr[nelms], eind));


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERT(nelms > 0);

  /* construct node distribution array */
  nodedist = ismalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = ikvmalloc(eptr[nelms], "nodelist");
  auxarray = imalloc(eptr[nelms], "auxarray");
  htable   = ismalloc(gk_max(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(npes, "scounts");
  rcounts  = imalloc(npes, "rcounts");
  sdispl   = imalloc(npes+1, "sdispl");
  rdispl   = imalloc(npes+1, "rdispl");


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikvsorti(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = imalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  gkMPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERT(pe < npes);

  gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERT(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = ikvmalloc(gk_max(1, nrecv), "recvbuffer");

  gkMPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_T, (void *)recvbuffer, 
      rcounts, rdispl, IDX_T, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = ismalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERT(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = imalloc(gk_max(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  gkMPI_Alltoall((void *)scounts, 1, IDX_T, (void *)rcounts, 1, IDX_T, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = imalloc(gk_max(1, nsend), "sbuffer");

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERT(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv   = rdispl[npes];
  rbuffer = imalloc(gk_max(1, nrecv), "rbuffer");

  gkMPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_T, (void *)rbuffer, 
      rcounts, rdispl, IDX_T, *comm);

  k = -1;
  nptr = ismalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERT(k+1 == lnns);
  ASSERT(nptr[lnns] == nrecv)

  myxadj = *r_xadj = (idx_t *)malloc(sizeof(idx_t)*(nelms+1));
  if (myxadj == NULL) 
    gk_errexit(SIGMEM, "Failed to allocate memory for the dual graph's xadj array.\n");
  iset(nelms+1, 0, myxadj);

  iset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  maxcount = 200;
  ind = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: ind");
  wgt = imalloc(maxcount, "ParMETIS_V3_Mesh2Dual: wgt");

  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }

          /* Adjust the memory. 
             This will be replaced by a idxrealloc() when GKlib will be incorporated */
          if (count == maxcount-1) {
            maxcount *= 2;
            ind = irealloc(ind, maxcount, "ParMETIS_V3_Mesh2Dual: ind");
            wgt = irealloc(wgt, maxcount, "ParMETIS_V3_Mesh2Dual: wgt");
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommon) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *r_adjncy = (idx_t *)malloc(sizeof(idx_t)*myxadj[nelms]);
      if (myadjncy == NULL)
        gk_errexit(SIGMEM, "Failed to allocate memory for dual graph's adjncy array.\n");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  gk_free((void **)&nodedist, &nodelist, &auxarray, &htable, &scounts, &rcounts,
      &sdispl, &rdispl, &nmap, &recvbuffer, &gnptr, &gnind, &sbuffer, &rbuffer,
      &nptr, &ind, &wgt, LTERM);

  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));
  }
  gk_malloc_cleanup(0);

  return METIS_OK;
}
Esempio n. 11
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i, j, nvtxs, nedges, ncon;
  idx_t mype, npes, srnpes, srmype; 
  idx_t *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
  idx_t *part, *lwhere, *home;
  idx_t lnparts, fpart, fpe, lnpes, ngroups;
  idx_t *rcounts, *rdispls;
  idx_t twoparts=2, moptions[METIS_NOPTIONS], edgecut, max_cut;
  idx_t sr_pe, gd_pe, sr, gd, who_wins;
  real_t my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
  real_t rating, max_rating, your_cost = -1.0, your_balance = -1.0;
  real_t lbsum, min_lbsum, *lbvec, *tpwgts, *tpwgts2, buffer[2];
  graph_t *agraph, cgraph;
  ctrl_t *myctrl;
  MPI_Status status;
  MPI_Comm ipcomm, srcomm;
  struct {
    double cost;
    int rank;
  } lpecost, gpecost;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));
  WCOREPUSH;

  vtxdist = graph->vtxdist;
  agraph  = AssembleAdaptiveGraph(ctrl, graph);
  nvtxs   = cgraph.nvtxs  = agraph->nvtxs;
  nedges  = cgraph.nedges = agraph->nedges;
  ncon    = cgraph.ncon   = agraph->ncon;
  xadj    = cgraph.xadj   = icopy(nvtxs+1, agraph->xadj, iwspacemalloc(ctrl, nvtxs+1));
  vwgt    = cgraph.vwgt   = icopy(nvtxs*ncon, agraph->vwgt, iwspacemalloc(ctrl, nvtxs*ncon));
  vsize   = cgraph.vsize  = icopy(nvtxs, agraph->vsize, iwspacemalloc(ctrl, nvtxs));
  adjncy  = cgraph.adjncy = icopy(nedges, agraph->adjncy, iwspacemalloc(ctrl, nedges));
  adjwgt  = cgraph.adjwgt = icopy(nedges, agraph->adjwgt, iwspacemalloc(ctrl, nedges));
  part    = cgraph.where  = agraph->where = iwspacemalloc(ctrl, nvtxs);

  lwhere = iwspacemalloc(ctrl, nvtxs);
  home   = iwspacemalloc(ctrl, nvtxs);
  lbvec  = rwspacemalloc(ctrl, graph->ncon);


  /****************************************/
  /****************************************/
  if (ctrl->ps_relation == PARMETIS_PSR_UNCOUPLED) {
    WCOREPUSH;
    rcounts = iwspacemalloc(ctrl, ctrl->npes);
    rdispls = iwspacemalloc(ctrl, ctrl->npes+1);

    for (i=0; i<ctrl->npes; i++) 
      rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
    MAKECSR(i, ctrl->npes, rdispls);

    gkMPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_T,
        (void *)part, rcounts, rdispls, IDX_T, ctrl->comm);

    for (i=0; i<agraph->nvtxs; i++)
      home[i] = part[i];

    WCOREPOP;  /* local frees */
  }
  else {
    for (i=0; i<ctrl->npes; i++) {
      for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
        part[j] = home[j] = i;
    }
  }

  /* Ensure that the initial partitioning is legal */
  for (i=0; i<agraph->nvtxs; i++) {
    if (part[i] >= ctrl->nparts)
      part[i] = home[i] = part[i] % ctrl->nparts;
    if (part[i] < 0)
      part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
  }
  /****************************************/
  /****************************************/

  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      rprintf(ctrl, "input cut: %"PRIDX", balance: ", ComputeSerialEdgeCut(agraph)));
  for (i=0; i<agraph->ncon; i++)
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]));
  IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

  /****************************************/
  /* Split the processors into two groups */
  /****************************************/
  sr = (ctrl->mype % 2 == 0) ? 1 : 0;
  gd = (ctrl->mype % 2 == 1) ? 1 : 0;

  if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
    sr = 1;
    gd = 0;
  }

  sr_pe = 0;
  gd_pe = 1;

  gkMPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
  gkMPI_Comm_rank(ipcomm, &mype);
  gkMPI_Comm_size(ipcomm, &npes);

  if (sr == 1) { /* Half of the processors do scratch-remap */
    ngroups = gk_max(gk_min(RIP_SPLIT_FACTOR, npes), 1);
    gkMPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
    gkMPI_Comm_rank(srcomm, &srmype);
    gkMPI_Comm_size(srcomm, &srnpes);

    METIS_SetDefaultOptions(moptions);
    moptions[METIS_OPTION_SEED] = ctrl->sync + (mype % ngroups) + 1;

    tpwgts  = ctrl->tpwgts;
    tpwgts2 = rwspacemalloc(ctrl, 2*ncon);

    iset(nvtxs, 0, lwhere);
    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = srnpes;
    while (lnpes > 1 && lnparts > 1) {
      PASSERT(ctrl, agraph->nvtxs > 1);
      /* determine the weights of the two partitions as a function of the 
         weight of the target partition weights */
      for (j=(lnparts>>1), i=0; i<ncon; i++) {
        tpwgts2[i]      = rsum(j, tpwgts+fpart*ncon+i, ncon);
        tpwgts2[ncon+i] = rsum(lnparts-j, tpwgts+(fpart+j)*ncon+i, ncon);
        wsum            = 1.0/(tpwgts2[i] + tpwgts2[ncon+i]);
        tpwgts2[i]      *= wsum;
        tpwgts2[ncon+i] *= wsum;
      }

      METIS_PartGraphRecursive(&agraph->nvtxs, &ncon, agraph->xadj, 
            agraph->adjncy, agraph->vwgt, NULL, agraph->adjwgt, 
            &twoparts, tpwgts2, NULL, moptions, &edgecut, part);

      /* pick one of the branches */
      if (srmype < fpe+lnpes/2) {
        KeepPart(ctrl, agraph, part, 0);
        lnpes   = lnpes/2;
        lnparts = lnparts/2;
      }
      else {
        KeepPart(ctrl, agraph, part, 1);
        fpart   = fpart + lnparts/2;
        fpe     = fpe + lnpes/2;
        lnpes   = lnpes - lnpes/2;
        lnparts = lnparts - lnparts/2;
      }
    }

    if (lnparts == 1) { /* Case in which srnpes is greater or equal to nparts */
      /* Only the first process will assign labels (for the reduction to work) */
      if (srmype == fpe) {
        for (i=0; i<agraph->nvtxs; i++) 
          lwhere[agraph->label[i]] = fpart;
      }
    }
    else { /* Case in which srnpes is smaller than nparts */
      /* create the normalized tpwgts for the lnparts from ctrl->tpwgts */
      tpwgts = rwspacemalloc(ctrl, lnparts*ncon);
      for (j=0; j<ncon; j++) {
        for (wsum=0.0, i=0; i<lnparts; i++) {
          tpwgts[i*ncon+j] = ctrl->tpwgts[(fpart+i)*ncon+j];
          wsum += tpwgts[i*ncon+j];
        }
        for (wsum=1.0/wsum, i=0; i<lnparts; i++)
          tpwgts[i*ncon+j] *= wsum;
      }

      METIS_PartGraphKway(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy, 
	    agraph->vwgt, NULL, agraph->adjwgt, &lnparts, tpwgts, NULL, moptions, 
            &edgecut, part);

      for (i=0; i<agraph->nvtxs; i++) 
        lwhere[agraph->label[i]] = fpart + part[i];
    }

    gkMPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_T, MPI_SUM, srcomm);

    edgecut = ComputeSerialEdgeCut(&cgraph);
    ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
    lbsum = rsum(ncon, lbvec, 1);
    gkMPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, IDX_T, MPI_MAX, ipcomm);
    gkMPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, REAL_T, MPI_MIN, ipcomm);
    lpecost.rank = ctrl->mype;
    lpecost.cost = lbsum;
    if (min_lbsum < UNBALANCE_FRACTION * (real_t)(ncon)) {
      if (lbsum < UNBALANCE_FRACTION * (real_t)(ncon))
        lpecost.cost = (double)edgecut;
      else
        lpecost.cost = (double)max_cut + lbsum;
    }
    gkMPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_DOUBLE_INT,
        MPI_MINLOC, ipcomm);

    if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) 
      gkMPI_Send((void *)part, nvtxs, IDX_T, sr_pe, 1, ctrl->comm);

    if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) 
      gkMPI_Recv((void *)part, nvtxs, IDX_T, gpecost.rank, 1, ctrl->comm, &status);

    if (ctrl->mype == sr_pe) {
      icopy(nvtxs, part, lwhere);
      SerialRemap(ctrl, &cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
    }

    gkMPI_Comm_free(&srcomm);
  }
Esempio n. 12
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void RedoMyLink(ctrl_t *ctrl, graph_t *graph, idx_t *home, idx_t me,
                idx_t you, real_t *flows, real_t *sr_cost, real_t *sr_lbavg)
{
    idx_t h, i, r;
    idx_t nvtxs, nedges, ncon;
    idx_t  pass, lastseed, totalv;
    idx_t *xadj, *adjncy, *adjwgt, *where, *vsize;
    idx_t *costwhere, *lbwhere, *selectwhere;
    idx_t *ed, *id, *bndptr, *bndind, *perm;
    real_t *nvwgt, mycost;
    real_t lbavg, *lbvec;
    real_t best_lbavg, other_lbavg = -1.0, bestcost, othercost = -1.0;
    real_t *npwgts, *pwgts, *tpwgts;
    real_t ipc_factor, redist_factor, ftmp;
    idx_t mype;
    gkMPI_Comm_rank(MPI_COMM_WORLD, &mype);


    WCOREPUSH;

    nvtxs  = graph->nvtxs;
    nedges = graph->nedges;
    ncon   = graph->ncon;
    xadj   = graph->xadj;
    nvwgt  = graph->nvwgt;
    vsize  = graph->vsize;
    adjncy = graph->adjncy;
    adjwgt = graph->adjwgt;
    where  = graph->where;

    ipc_factor    = ctrl->ipc_factor;
    redist_factor = ctrl->redist_factor;

    /* set up data structures */
    id     = graph->sendind = iwspacemalloc(ctrl, nvtxs);
    ed     = graph->recvind = iwspacemalloc(ctrl, nvtxs);
    bndptr = graph->sendptr = iwspacemalloc(ctrl, nvtxs);
    bndind = graph->recvptr = iwspacemalloc(ctrl, nvtxs);

    costwhere = iwspacemalloc(ctrl, nvtxs);
    lbwhere   = iwspacemalloc(ctrl, nvtxs);
    perm      = iwspacemalloc(ctrl, nvtxs);

    lbvec  = rwspacemalloc(ctrl, ncon);
    pwgts  = rset(2*ncon, 0.0, rwspacemalloc(ctrl, 2*ncon));
    npwgts = rwspacemalloc(ctrl, 2*ncon);
    tpwgts = rwspacemalloc(ctrl, 2*ncon);

    graph->gnpwgts = npwgts;

    RandomPermute(nvtxs, perm, 1);
    icopy(nvtxs, where, costwhere);
    icopy(nvtxs, where, lbwhere);

    /* compute target pwgts */
    for (h=0; h<ncon; h++) {
        tpwgts[h]      = -1.0*flows[h];
        tpwgts[ncon+h] = flows[h];
    }

    for (i=0; i<nvtxs; i++) {
        if (where[i] == me) {
            for (h=0; h<ncon; h++) {
                tpwgts[h] += nvwgt[i*ncon+h];
                pwgts[h]  += nvwgt[i*ncon+h];
            }
        }
        else {
            ASSERT(where[i] == you);
            for (h=0; h<ncon; h++) {
                tpwgts[ncon+h] += nvwgt[i*ncon+h];
                pwgts[ncon+h]  += nvwgt[i*ncon+h];
            }
        }
    }

    /* we don't want any weights to be less than zero */
    for (h=0; h<ncon; h++) {
        if (tpwgts[h] < 0.0) {
            tpwgts[ncon+h] += tpwgts[h];
            tpwgts[h] = 0.0;
        }

        if (tpwgts[ncon+h] < 0.0) {
            tpwgts[h] += tpwgts[ncon+h];
            tpwgts[ncon+h] = 0.0;
        }
    }


    /* now compute new bisection */
    bestcost = (real_t)isum(nedges, adjwgt, 1)*ipc_factor +
               (real_t)isum(nvtxs, vsize, 1)*redist_factor;
    best_lbavg = 10.0;

    lastseed = 0;
    for (pass=N_MOC_REDO_PASSES; pass>0; pass--) {
        iset(nvtxs, 1, where);

        /* find seed vertices */
        r = perm[lastseed] % nvtxs;
        lastseed = (lastseed+1) % nvtxs;
        where[r] = 0;

        Mc_Serial_Compute2WayPartitionParams(ctrl, graph);
        Mc_Serial_Init2WayBalance(ctrl, graph, tpwgts);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);
        Mc_Serial_Balance2Way(ctrl, graph, tpwgts, 1.02);
        Mc_Serial_FM_2WayRefine(ctrl, graph, tpwgts, 4);

        for (i=0; i<nvtxs; i++)
            where[i] = (where[i] == 0) ? me : you;

        for (i=0; i<ncon; i++) {
            ftmp = (pwgts[i]+pwgts[ncon+i])/2.0;
            if (ftmp != 0.0)
                lbvec[i] = fabs(npwgts[i]-tpwgts[i])/ftmp;
            else
                lbvec[i] = 0.0;
        }
        lbavg = ravg(ncon, lbvec);

        totalv = 0;
        for (i=0; i<nvtxs; i++)
            if (where[i] != home[i])
                totalv += vsize[i];

        mycost = (real_t)(graph->mincut)*ipc_factor + (real_t)totalv*redist_factor;

        if (bestcost >= mycost) {
            bestcost = mycost;
            other_lbavg = lbavg;
            icopy(nvtxs, where, costwhere);
        }

        if (best_lbavg >= lbavg) {
            best_lbavg = lbavg;
            othercost = mycost;
            icopy(nvtxs, where, lbwhere);
        }
    }

    if (other_lbavg <= .05) {
        selectwhere = costwhere;
        *sr_cost = bestcost;
        *sr_lbavg = other_lbavg;
    }
    else {
        selectwhere = lbwhere;
        *sr_cost = othercost;
        *sr_lbavg = best_lbavg;
    }

    icopy(nvtxs, selectwhere, where);

    WCOREPOP;
}