Beispiel #1
* This function is the driver for the partition refinement mode of ParMETIS
void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)

  SetUp(ctrl, graph, wspace);
  graph->ncon = 1;

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d][%d]\n",
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs),
        GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo,
        GlobalSEMax(ctrl, graph->vwgt[idxamax(graph->nvtxs, graph->vwgt)])));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {
    /* Compute the initial npart-way multisection */
    InitMultisection(ctrl, graph, wspace);

    if (graph->finer == NULL) { /* Do that only of no-coarsening took place */
      ComputeNodePartitionParams(ctrl, graph, wspace);
      KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION);
  else { /* Coarsen it and the partition it */
    Mc_LocalMatch_HEM(ctrl, graph, wspace);

    Order_Partition(ctrl, graph->coarser, wspace);

    Moc_ProjectPartition(ctrl, graph, wspace);
    ComputeNodePartitionParams(ctrl, graph, wspace);
    KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION);
Beispiel #2
* This function computes movement statistics for adaptive refinement
* schemes
void ComputeMoveStatistics(CtrlType *ctrl, GraphType *graph, int *nmoved, int *maxin, int *maxout)
    int i, j, nvtxs;
    idxtype *vwgt, *where;
    idxtype *lpvtxs, *gpvtxs;

    nvtxs = graph->nvtxs;
    vwgt = graph->vwgt;
    where = graph->where;

    lpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: lpvtxs");
    gpvtxs = idxsmalloc(ctrl->nparts, 0, "ComputeMoveStatistics: gpvtxs");

    for (j=i=0; i<nvtxs; i++) {
        if (where[i] != ctrl->mype)

    /* PrintVector(ctrl, ctrl->npes, 0, lpvtxs, "Lpvtxs: "); */

    MPI_Allreduce((void *)lpvtxs, (void *)gpvtxs, ctrl->nparts, IDX_DATATYPE, MPI_SUM, ctrl->comm);

    *nmoved = GlobalSESum(ctrl, j);
    *maxout = GlobalSEMax(ctrl, j);
    *maxin = GlobalSEMax(ctrl, gpvtxs[ctrl->mype]-(nvtxs-j));

    GKfree((void **)&lpvtxs, (void **)&gpvtxs, LTERM);
Beispiel #3
* This function setsup the CtrlType structure
GraphType *Moc_SetUpGraph(CtrlType *ctrl, int ncon, idxtype *vtxdist, idxtype *xadj,
                          idxtype *vwgt, idxtype *adjncy, idxtype *adjwgt, int *wgtflag)
    int i, j;
    GraphType *graph;
    int ltvwgts[MAXNCON];

    graph = CreateGraph();
    graph->level   = 0;
    graph->gnvtxs  = vtxdist[ctrl->npes];
    graph->nvtxs   = vtxdist[ctrl->mype+1]-vtxdist[ctrl->mype];
    graph->ncon    = ncon;
    graph->nedges  = xadj[graph->nvtxs];
    graph->xadj    = xadj;
    graph->vwgt    = vwgt;
    graph->adjncy  = adjncy;
    graph->adjwgt  = adjwgt;
    graph->vtxdist = vtxdist;

    if (((*wgtflag)&2) == 0)
        graph->vwgt = idxsmalloc(graph->nvtxs*ncon, 1, "Par_KMetis: vwgt");

    if (((*wgtflag)&1) == 0)
        graph->adjwgt = idxsmalloc(graph->nedges, 1, "Par_KMetis: adjwgt");

    /* compute tvwgts */
    for (j=0; j<ncon; j++)
        ltvwgts[j] = 0;

    for (i=0; i<graph->nvtxs; i++)
        for (j=0; j<ncon; j++)
            ltvwgts[j] += graph->vwgt[i*ncon+j];

    for (j=0; j<ncon; j++)
        ctrl->tvwgts[j] = GlobalSESum(ctrl, ltvwgts[j]);

    /* check for zero wgt constraints */
    for (i=0; i<ncon; i++) {
        /* ADD: take care of the case in which tvwgts is zero */
        if (ctrl->tvwgts[i] == 0) {
            rprintf(ctrl, "ERROR: sum weight for constraint %d is zero\n", i);

    /* compute nvwgts */
    graph->nvwgt = fmalloc(graph->nvtxs*ncon, "graph->nvwgt");
    for (i=0; i<graph->nvtxs; i++) {
        for (j=0; j<ncon; j++)
            graph->nvwgt[i*ncon+j] = (floattype)(graph->vwgt[i*ncon+j]) / (floattype)(ctrl->tvwgts[j]);


    return graph;
Beispiel #4
* This function writes a distributed graph to file
void Moc_ParallelWriteGraph(CtrlType *ctrl, GraphType *graph, char *filename,
     int nparts, int testset)
  int h, i, j;
  int npes, mype, penum, gnedges;
  char partfile[256];
  FILE *fpin;
  MPI_Comm comm;

  comm = ctrl->comm;
  MPI_Comm_size(comm, &npes);
  MPI_Comm_rank(comm, &mype);

  gnedges = GlobalSESum(ctrl, graph->nedges);
  sprintf(partfile, "%s.%d.%d.%d", filename, testset, graph->ncon, nparts);

  if (mype == 0) {
    if ((fpin = fopen(partfile, "w")) == NULL)
      errexit("Failed to open file %s", partfile);

    fprintf(fpin, "%d %d %d %d %d\n", graph->gnvtxs, gnedges/2, 11, graph->ncon, 1);

  for (penum=0; penum<npes; penum++) {
    if (mype == penum) {

      if ((fpin = fopen(partfile, "a")) == NULL)
        errexit("Failed to open file %s", partfile);

      for (i=0; i<graph->nvtxs; i++) {
        for (h=0; h<graph->ncon; h++)
          fprintf(fpin, "%d ", graph->vwgt[i*graph->ncon+h]);

        for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
          fprintf(fpin, "%d ", graph->adjncy[j]+1);
          fprintf(fpin, "%d ", graph->adjwgt[j]);
      fprintf(fpin, "\n");

Beispiel #5
* This function is the entry point of the parallel ordering algorithm.
* This function assumes that the graph is already nice partitioned among the 
* processors and then proceeds to perform recursive bisection.
void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag,
              int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm)
  int i, j;
  int ltvwgts[MAXNCON];
  int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  idxtype *morder;
  int minnvtxs;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);
  nparts = npes;

  if (!ispow2(npes)) {
    if (mype == 0)
      printf("Error: The number of processors must be a power of 2!\n");

  if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) {
    if (mype == 0)
      printf("Error: Too many processors for this many vertices.\n");

  minnvtxs = vtxdist[1]-vtxdist[0];
  for (i=0; i<npes; i++)
    minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i];

  if (minnvtxs < (int)((float)npes*1.1)) {
    if (mype == 0)
      printf("Error: vertices are not distributed equally.\n");

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1);

  SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes);

  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts));
  ctrl.seed = mype;
  ctrl.sync = seed;
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts");
  ctrl.ubvec[0] = 1.03;

  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag);

  PreAllocateMemory(&ctrl, graph, &wspace);

   * Compute the initial k-way partitioning 
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Moc_Global_Partition(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

   * Move the graph according to the partitioning
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  graph->ncon = 1;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

   * Now compute an ordering of the moved graph
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  PreAllocateMemory(&ctrl, mgraph, &wspace);

  ctrl.ipart = ISEP_NODE;
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000));

  /* compute tvwgts */
  for (j=0; j<mgraph->ncon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j];

  for (j=0; j<mgraph->ncon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]);

  morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder");
  MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace);


  /* Invert the ordering back to the original graph */
  ProjectInfoBack(&ctrl, graph, order, morder, &wspace);


  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  FreeInitialGraphAndRemap(graph, 0);

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0);

Beispiel #6
* This function is the entry point of the parallel kmetis algorithm that uses
* coordinates to compute an initial graph distribution.
void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, 
	      float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, 
	      int *options, int *edgecut, idxtype *part, MPI_Comm *comm)
  int h, i, j;
  int nvtxs = -1, npes, mype;
  int uwgtflag, cut, gcut, maxnvtxs;
  int ltvwgts[MAXNCON];
  int moptions[10];
  CtrlType ctrl;
  idxtype *uvwgt;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  float avg, maximb, balance, *mytpwgts;
  int seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /* Try and take care bad inputs */
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];

  CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
	      NULL, NULL, options, ioptions, part, comm);

  /* Take care the nparts = 1 case */
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part);
    *edgecut = 0;

  /* Take care of npes = 1 case */
  if (npes == 1 && inparts > 1) {
    moptions[0] = 0;
    nvtxs = vtxdist[1];

    if (incon == 1) {
      METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, 
            &inparts, itpwgts, moptions, edgecut, part);
    else {
      /* ADD: this is because METIS does not support tpwgts for all constraints */
      mytpwgts = fmalloc(inparts, "mytpwgts");
      for (i=0; i<inparts; i++)
        mytpwgts[i] = itpwgts[i*incon];

      moptions[7] = -1;
      METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, 
            &inumflag, &inparts, mytpwgts, moptions, edgecut, part);



  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /* Set up control structures */
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
  SetUpCtrl(&ctrl, npes, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts));
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = itpwgts;
  scopy(incon, iubvec, ctrl.ubvec);

  uwgtflag = iwgtflag|2;
  uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt");
  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag);
  free(graph->nvwgt); graph->nvwgt = NULL;

  PreAllocateMemory(&ctrl, graph, &wspace);

   * Compute the initial npes-way partitioning geometric partitioning
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

   * Move the graph according to the partitioning
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt");
  graph->ncon = incon;
  j = ctrl.nparts;
  ctrl.nparts = ctrl.npes;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);
  ctrl.nparts = j;

  /* Do the same functionality as Moc_SetUpGraph for mgraph */
  /* compute tvwgts */
  for (j=0; j<incon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<graph->nvtxs; i++)
    for (j=0; j<incon; j++)
      ltvwgts[j] += mgraph->vwgt[i*incon+j];

  for (j=0; j<incon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  /* check for zero wgt constraints */
  for (i=0; i<incon; i++) {
    /* ADD: take care of the case in which tvwgts is zero */
    if (ctrl.tvwgts[i] == 0) {
      if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i);

  /* compute nvwgt */
  mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<incon; j++)
      mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

  if (ctrl.dbglvl&DBG_INFO) {
    cut = 0;
    for (i=0; i<graph->nvtxs; i++)
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
        if (graph->where[i] != graph->where[graph->adjncy[j]])
          cut += graph->adjwgt[j];
    gcut = GlobalSESum(&ctrl, cut)/2;
    maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs);
    balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes));
    rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n",
      gcut, balance, maxnvtxs, graph->gnvtxs, npes);


   * Set up the newly moved graph
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  ctrl.nparts = inparts;
  PreAllocateMemory(&ctrl, mgraph, &wspace);

   * Now compute the partition of the moved graph
  if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) {
    IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes]));
    PartitionSmallGraph(&ctrl, mgraph, &wspace);
  else {
    Moc_Global_Partition(&ctrl, mgraph, &wspace);
  ParallelReMapGraph(&ctrl, mgraph, &wspace);

  /* Invert the ordering back to the original graph */
  ctrl.nparts = npes;
  ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace);

  *edgecut = mgraph->mincut;

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  /* Print out stats */
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    rprintf(&ctrl, "  avg: %.3f\n", avg/(float)incon);

  GKfree((void **)&itpwgts, LTERM);
  FreeInitialGraphAndRemap(graph, iwgtflag);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

Beispiel #7
void KWayNodeRefine(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, 
         int npasses, float ubfraction)
  int i, ii, iii, j, k, pass, nvtxs, firstvtx, lastvtx, otherlastvtx, c, nmoves, 
      nlupd, nsupd, nnbrs, nchanged, nsep;
  int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
  idxtype *xadj, *adjncy, *adjwgt, *vtxdist, *vwgt;
  idxtype *where, *lpwgts, *gpwgts, *sepind;
  idxtype *peind, *recvptr, *sendptr;
  idxtype *update, *supdate, *rupdate, *pe_updates, *htable, *changed;
  idxtype *badminpwgt, *badmaxpwgt;
  KeyValueType *swchanges, *rwchanges;
  int *nupds_pe;
  NRInfoType *rinfo, *myrinfo;
  int from, to, me, other, oldcut;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));

  nvtxs = graph->nvtxs;

  vtxdist = graph->vtxdist;
  xadj    = graph->xadj;
  adjncy  = graph->adjncy;
  adjwgt  = graph->adjwgt;
  vwgt    = graph->vwgt;

  firstvtx = vtxdist[mype];
  lastvtx  = vtxdist[mype+1];

  where  = graph->where;
  rinfo  = graph->nrinfo;
  lpwgts = graph->lpwgts;
  gpwgts = graph->gpwgts;

  nsep   = graph->nsep;
  sepind = graph->sepind;

  nnbrs   = graph->nnbrs;
  peind   = graph->peind;
  recvptr = graph->recvptr;
  sendptr = graph->sendptr;

  changed   = idxmalloc(nvtxs, "KWayRefine: changed");
  rwchanges = wspace->pairs;
  swchanges = rwchanges + recvptr[nnbrs];

  update   = idxmalloc(nvtxs, "KWayRefine: update");
  supdate  = wspace->indices;
  rupdate  = supdate + recvptr[nnbrs];
  nupds_pe = imalloc(npes, "KWayRefine: nupds_pe");

  htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWayRefine: lhtable");

  badminpwgt = wspace->pv1;
  badmaxpwgt = wspace->pv2;

  for (i=0; i<nparts; i+=2) {
    badminpwgt[i] = badminpwgt[i+1] = (1.0/ubfraction)*(gpwgts[i]+gpwgts[i+1])/2;
    badmaxpwgt[i] = badmaxpwgt[i+1] = ubfraction*(gpwgts[i]+gpwgts[i+1])/2;
  //myprintf(ctrl, "%6d %6d %6d %6d %6d %6d %6d\n", lpwgts[0], lpwgts[1], lpwgts[2], gpwgts[0], gpwgts[1], gpwgts[2], badmaxpwgt[0]);

  IFSET(ctrl->dbglvl, DBG_REFINEINFO, 
      PrintNodeBalanceInfo(ctrl, nparts, gpwgts, badminpwgt, badmaxpwgt, 1));

  for (pass=0; pass<npasses; pass++) {
    oldcut = graph->mincut;

    for (c=0; c<2; c++) {
      for (i=0; i<nparts; i+=2) {
        badminpwgt[i] = badminpwgt[i+1] = (1.0/ubfraction)*(gpwgts[i]+gpwgts[i+1])/2;
        badmaxpwgt[i] = badmaxpwgt[i+1] = ubfraction*(gpwgts[i]+gpwgts[i+1])/2;

      nlupd = nsupd = nmoves = nchanged = 0;
      for (ii=0; ii<nsep; ii++) {
        i = sepind[ii];
        from = where[i];

        ASSERT(ctrl, from >= nparts);

        /* Go through the loop to see if gain is possible for the separator vertex */
        if (rinfo[i].edegrees[(c+1)%2] <= vwgt[i]) {
          /* It is a one-sded move so it will go to the other partition. 
             Look at the comments in InitMultisection to understand the meaning 
             of from%nparts */
          to = from%nparts+c;  

          if (gpwgts[to]+vwgt[i] > badmaxpwgt[to]) {
            /* printf("Skip because of weight! %d\n", vwgt[i]-rinfo[i].edegrees[(c+1)%2]); */
            continue;   /* We cannot move it there because it gets too heavy */

          /* Update the where information of the vertex you moved */
          where[i] = to;

          /* Remove this vertex from the sepind. Note the trick for looking at 
             the sepind[ii] again */
          sepind[ii--] = sepind[--nsep]; 

          /* myprintf(ctrl, "Vertex %d [%d %d] is moving to %d from %d [%d]\n", 
                  i+firstvtx, vwgt[i], rinfo[i].edegrees[(c+1)%2], to, from, where[i]); */

          lpwgts[from]       -= vwgt[i];
          lpwgts[2*nparts-1] -= vwgt[i];
          lpwgts[to]         += vwgt[i];
          gpwgts[to]         += vwgt[i];

          /* Put the vertices adjacent to i that belong to either the separator or
             the (c+1)%2 partition into the update array */
          for (j=xadj[i]; j<xadj[i+1]; j++) {
            k = adjncy[j];
            if (htable[k] == 0 && where[k] != to) {
              htable[k] = 1;
              if (k<nvtxs)
                update[nlupd++] = k;
                supdate[nsupd++] = k;
          if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
            changed[nchanged++] = i;

      /* myprintf(ctrl, "nmoves: %d, nlupd: %d, nsupd: %d\n", nmoves, nlupd, nsupd); */

      /* Tell everybody interested what the new where[] info is for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, 
          rwchanges, wspace->pv4); 

      IFSET(ctrl->dbglvl, DBG_RMOVEINFO, rprintf(ctrl, "\t[%d %d], [%d %d %d]\n", 
                pass, c, GlobalSESum(ctrl, nmoves), GlobalSESum(ctrl, nsupd), 
                GlobalSESum(ctrl, nlupd)));

      / Time to communicate with processors to send the vertices whose degrees 
      / need to be updated.
      /* Issue the receives first */
      for (i=0; i<nnbrs; i++) {
        MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
                  peind[i], 1, ctrl->comm, ctrl->rreq+i);

      /* Issue the sends next. This needs some preporcessing */
      for (i=0; i<nsupd; i++) {
        htable[supdate[i]] = 0;
        supdate[i] = graph->imap[supdate[i]];
      iidxsort(nsupd, supdate);

      for (j=i=0; i<nnbrs; i++) {
        otherlastvtx = vtxdist[peind[i]+1];
        for (k=j; k<nsupd && supdate[k] < otherlastvtx; k++); 
        MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, 
        j = k;

      /* OK, now get into the loop waiting for the send/recv operations to finish */
      MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
      for (i=0; i<nnbrs; i++) 
        MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
      MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);

      / Place the received to-be updated vertices into update[] 
      for (i=0; i<nnbrs; i++) {
        pe_updates = rupdate+sendptr[i];
        for (j=0; j<nupds_pe[i]; j++) {
          k = pe_updates[j];
          if (htable[k-firstvtx] == 0) {
            htable[k-firstvtx] = 1;
            update[nlupd++] = k-firstvtx;

      / Update the where information of the vertices that are pulled
      / into the separator.
      nchanged = 0;
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        me = where[i];
        if (me < nparts && me%2 == (c+1)%2) { /* This vertex is pulled into the separator */
          lpwgts[me] -= vwgt[i];
          where[i] = nparts+me-(me%2); 
          sepind[nsep++] = i;  /* Put the vertex into the sepind array */
          if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
            changed[nchanged++] = i;

          lpwgts[where[i]]   += vwgt[i];
          lpwgts[2*nparts-1] += vwgt[i];
          /* myprintf(ctrl, "Vertex %d moves into the separator from %d to %d\n", 
                 i+firstvtx, me, where[i]); */

      /* Tell everybody interested what the new where[] info is for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where, swchanges, 
          rwchanges, wspace->pv4); 

      / Update the rinfo of the vertices in the update[] array
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        ASSERT(ctrl, htable[i] == 1);

        htable[i] = 0;

        me = where[i];
        if (me >= nparts) {  /* If it is a separator vertex */
          /* myprintf(ctrl, "Updating %d %d\n", i+firstvtx, me); */

          myrinfo = rinfo+i;
          myrinfo->edegrees[0] = myrinfo->edegrees[1] = 0;

          for (j=xadj[i]; j<xadj[i+1]; j++) {
            other = where[adjncy[j]];
            if (me != other)
              myrinfo->edegrees[other%2] += vwgt[adjncy[j]];

      /* Finally, sum-up the partition weights */
      MPI_Allreduce((void *)lpwgts, (void *)gpwgts, 2*nparts, IDX_DATATYPE, MPI_SUM, 
      graph->mincut = gpwgts[2*nparts-1];

      IFSET(ctrl->dbglvl, DBG_REFINEINFO, PrintNodeBalanceInfo(ctrl, nparts, gpwgts, 
            badminpwgt, badmaxpwgt, 0));

    if (graph->mincut == oldcut)

  GKfree((void **)&update, &nupds_pe, &htable, &changed, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
Beispiel #8
* This function performs a k-way directed diffusion
real_t WavefrontDiffusion(ctrl_t *ctrl, graph_t *graph, idx_t *home)
  idx_t ii, i, j, k, l, nvtxs, nedges, nparts;
  idx_t from, to, edge, done, nswaps, noswaps, totalv, wsize;
  idx_t npasses, first, second, third, mind, maxd;
  idx_t *xadj, *adjncy, *adjwgt, *where, *perm;
  idx_t *rowptr, *colind, *ed, *psize;
  real_t *transfer, *tmpvec;
  real_t balance = -1.0, *load, *solution, *workspace;
  real_t *nvwgt, *npwgts, flowFactor, cost, ubfactor;
  matrix_t matrix;
  ikv_t *cand;
  idx_t ndirty, nclean, dptr, clean;

  nvtxs        = graph->nvtxs;
  nedges       = graph->nedges;
  xadj         = graph->xadj;
  nvwgt        = graph->nvwgt;
  adjncy       = graph->adjncy;
  adjwgt       = graph->adjwgt;
  where        = graph->where;
  nparts       = ctrl->nparts;
  ubfactor     = ctrl->ubvec[0];
  matrix.nrows = nparts;

  flowFactor = 0.35;
  flowFactor = (ctrl->mype == 2) ? 0.50 : flowFactor;
  flowFactor = (ctrl->mype == 3) ? 0.75 : flowFactor;
  flowFactor = (ctrl->mype == 4) ? 1.00 : flowFactor;

  /* allocate memory */
  solution                   = rmalloc(4*nparts+2*nedges, "WavefrontDiffusion: solution");
  tmpvec                     = solution + nparts;
  npwgts                     = solution + 2*nparts;
  load                       = solution + 3*nparts;
  matrix.values              = solution + 4*nparts;
  transfer = matrix.transfer = solution + 4*nparts + nedges;

  perm                   = imalloc(2*nvtxs+2*nparts+nedges+1, "WavefrontDiffusion: perm");
  ed                     = perm + nvtxs;
  psize                  = perm + 2*nvtxs;
  rowptr = matrix.rowptr = perm + 2*nvtxs + nparts;
  colind = matrix.colind = perm + 2*nvtxs + 2*nparts + 1;

  /*GKTODO - Potential problem with this malloc */
  wsize     = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "WavefrontDiffusion: workspace");
  cand      = ikvmalloc(nvtxs, "WavefrontDiffusion: cand");

  /* Populate empty subdomains */
  iset(nparts, 0, psize);
  for (i=0; i<nvtxs; i++) 

  mind = iargmin(nparts, psize);
  maxd = iargmax(nparts, psize);
  if (psize[mind] == 0) {
    for (i=0; i<nvtxs; i++) {
      k = (RandomInRange(nvtxs)+i)%nvtxs; 
      if (where[k] == maxd) {
        where[k] = mind;

  iset(nvtxs, 0, ed);
  rset(nparts, 0.0, npwgts);
  for (i=0; i<nvtxs; i++) {
    npwgts[where[i]] += nvwgt[i];
    for (j=xadj[i]; j<xadj[i+1]; j++)
      ed[i] += (where[i] != where[adjncy[j]] ? adjwgt[j] : 0);

  ComputeLoad(graph, nparts, load, ctrl->tpwgts, 0);
  done = 0;

  /* zero out the tmpvec array */
  rset(nparts, 0.0, tmpvec);

  npasses = gk_min(nparts/2, NGD_PASSES);
  for (l=0; l<npasses; l++) {
    /* Set-up and solve the diffusion equation */
    nswaps = 0;

    /* Solve flow equations */
    SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);

    /* check for disconnected subdomains */
    for(i=0; i<matrix.nrows; i++) {
      if (matrix.rowptr[i]+1 == matrix.rowptr[i+1]) {
        cost = (real_t)(ctrl->mype); 
	goto CleanUpAndExit;

    ConjGrad2(&matrix, load, solution, 0.001, workspace);
    ComputeTransferVector(1, &matrix, solution, transfer, 0);

    GetThreeMax(nparts, load, &first, &second, &third);

    if (l%3 == 0) {
      FastRandomPermute(nvtxs, perm, 1);
    else {
      /* move dirty vertices first */
      ndirty = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])

      dptr = 0;
      for (i=0; i<nvtxs; i++) {
        if (where[i] != home[i])
          perm[dptr++] = i;
          perm[ndirty++] = i;

      PASSERT(ctrl, ndirty == nvtxs);
      ndirty = dptr;
      nclean = nvtxs-dptr;
      FastRandomPermute(ndirty, perm, 0);
      FastRandomPermute(nclean, perm+ndirty, 0);

    if (ctrl->mype == 0) {
      for (j=nvtxs, k=0, ii=0; ii<nvtxs; ii++) {
        i = perm[ii];
        if (ed[i] != 0) {
          cand[k].key = -ed[i];
          cand[k++].val = i;
        else {
          cand[--j].key = 0;
          cand[j].val = i;
      ikvsorti(k, cand);

    for (ii=0; ii<nvtxs/3; ii++) {
      i = (ctrl->mype == 0) ? cand[ii].val : perm[ii];
      from = where[i];

      /* don't move out the last vertex in a subdomain */
      if (psize[from] == 1)

      clean = (from == home[i]) ? 1 : 0;

      /* only move from top three or dirty vertices */
      if (from != first && from != second && from != third && clean)

      /* Scatter the sparse transfer row into the dense tmpvec row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++)
        tmpvec[colind[j]] = transfer[j];

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        to = where[adjncy[j]];
        if (from != to) {
          if (tmpvec[to] > (flowFactor * nvwgt[i])) {
            tmpvec[to] -= nvwgt[i];
            INC_DEC(psize[to], psize[from], 1);
            INC_DEC(npwgts[to], npwgts[from], nvwgt[i]);
            INC_DEC(load[to], load[from], nvwgt[i]);
            where[i] = to;

            /* Update external degrees */
            ed[i] = 0;
            for (k=xadj[i]; k<xadj[i+1]; k++) {
              edge = adjncy[k];
              ed[i] += (to != where[edge] ? adjwgt[k] : 0);

              if (where[edge] == from)
                ed[edge] += adjwgt[k];
              if (where[edge] == to)
                ed[edge] -= adjwgt[k];

      /* Gather the dense tmpvec row into the sparse transfer row */
      for (j=rowptr[from]+1; j<rowptr[from+1]; j++) {
        transfer[j] = tmpvec[colind[j]];
        tmpvec[colind[j]] = 0.0;
      ASSERT(fabs(rsum(nparts, tmpvec, 1)) < .0001)

    if (l % 2 == 1) {
      balance = rmax(nparts, npwgts)*nparts;
      if (balance < ubfactor + 0.035)
        done = 1;

      if (GlobalSESum(ctrl, done) > 0)

      noswaps = (nswaps > 0) ? 0 : 1;
      if (GlobalSESum(ctrl, noswaps) > ctrl->npes/2)


  graph->mincut = ComputeSerialEdgeCut(graph);
  totalv        = Mc_ComputeSerialTotalV(graph, home);
  cost          = ctrl->ipc_factor * (real_t)graph->mincut + ctrl->redist_factor * (real_t)totalv;

  gk_free((void **)&solution, (void **)&perm, (void **)&workspace, (void **)&cand, LTERM);

  return cost;
Beispiel #9
* This function is the driver for the adaptive refinement mode of ParMETIS
void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
  int i;
  int tewgt, tvsize;
  floattype gtewgt, gtvsize;
  floattype ubavg, lbavg, lbvec[MAXNCON];

  /* Set up important data structures */
  SetUp(ctrl, graph, wspace);

  ubavg   = savg(graph->ncon, ctrl->ubvec);
  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (floattype) GlobalSESum(ctrl, tewgt) + 1.0;  /* The +1 were added to remove any FPE */
  gtvsize = (floattype) GlobalSESum(ctrl, tvsize) + 1.0;
  ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio);

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d]\n", 
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
     (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    /* Balance the partition on the coarsest graph */
    graph->where = idxsmalloc(graph->nvtxs+graph->nrecv, -1, "graph->where");
    idxcopy(graph->nvtxs, graph->home, graph->where);

    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    lbavg = savg(graph->ncon, lbvec);

    if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION)
      Balance_Partition(ctrl, graph, wspace);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");

    /* check if no coarsening took place */
    if (graph->finer == NULL) {
      Moc_ComputePartitionParams(ctrl, graph, wspace);
      Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);
      Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES);
  else {
    /* Coarsen it and partition it */
    switch (ctrl->ps_relation) {
      case COUPLED:
        Mc_LocalMatch_HEM(ctrl, graph, wspace);
      case DISCOUPLED:
        Moc_GlobalMatch_Balance(ctrl, graph, wspace);

    Adaptive_Partition(ctrl, graph->coarser, wspace);

    /* project partition and refine */
    Moc_ProjectPartition(ctrl, graph, wspace);
    Moc_ComputePartitionParams(ctrl, graph, wspace);

    if (graph->ncon > 1 && graph->level < 4) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      lbavg = savg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.025) {
        Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);

    Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");
Beispiel #10
* This function computes the initial id/ed 
void Moc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
  int h, i, j, k;
  int nvtxs, ncon;
  int firstvtx, lastvtx;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  floattype *lnpwgts, *gnpwgts;
  idxtype *where, *swhere, *rwhere;
  RInfoType *rinfo, *myrinfo;
  EdgeType *edegrees;
  int me, other;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr));

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  where = graph->where;
  rinfo = graph->rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "CPP: rinfo");
  lnpwgts = graph->lnpwgts = fmalloc(ctrl->nparts*ncon, "CPP: lnpwgts");
  gnpwgts = graph->gnpwgts = fmalloc(ctrl->nparts*ncon, "CPP: gnpwgts");

  sset(ctrl->nparts*ncon, 0, lnpwgts);

  firstvtx = vtxdist[ctrl->mype];
  lastvtx = vtxdist[ctrl->mype+1];

  / Send/Receive the where information of interface vertices
  swhere = wspace->indices;
  rwhere = where + nvtxs;

  CommInterfaceData(ctrl, graph, where, swhere, rwhere); 

  PrintVector(ctrl, nvtxs, firstvtx, where, "where");

  ASSERT(ctrl, wspace->nlarge >= xadj[nvtxs]);

  / Compute now the id/ed degrees
  graph->lmincut = 0;
  for (i=0; i<nvtxs; i++) {
    me = where[i];
    myrinfo = rinfo+i;

    for (h=0; h<ncon; h++)
      lnpwgts[me*ncon+h] += graph->nvwgt[i*ncon+h];

    myrinfo->degrees = wspace->degrees + xadj[i];
    myrinfo->ndegrees = myrinfo->id = myrinfo->ed = 0;

    for (j=xadj[i]; j<xadj[i+1]; j++) {
      if (me == where[ladjncy[j]])
        myrinfo->id += adjwgt[j];
        myrinfo->ed += adjwgt[j];

    if (myrinfo->ed > 0) {  /* Time to do some serious work */
      graph->lmincut += myrinfo->ed;
      edegrees = myrinfo->degrees;

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        other = where[ladjncy[j]];
        if (me != other) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (edegrees[k].edge == other) {
              edegrees[k].ewgt += adjwgt[j];
          if (k == myrinfo->ndegrees) {
            edegrees[k].edge = other;
            edegrees[k].ewgt = adjwgt[j];
          ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);

  PrintVector(ctrl, ctrl->nparts*ncon, 0, lnpwgts, "lnpwgts");

  /* Finally, sum-up the partition weights */
  MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, ctrl->nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm);

  graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

  PrintVector(ctrl, ctrl->nparts*ncon, 0, gnpwgts, "gnpwgts");

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr));
Beispiel #11
* This function is the entry point of the parallel kmetis algorithm that uses
* coordinates to compute an initial graph distribution.
int ParMETIS_V3_PartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy,
        idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, 
	real_t *xyz, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, 
	idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm)
  idx_t h, i, j, npes, mype, status, nvtxs, seed, dbglvl;
  idx_t cut, gcut, maxnvtxs;
  idx_t moptions[METIS_NOPTIONS];
  ctrl_t *ctrl;
  graph_t *graph, *mgraph;
  real_t balance;
  size_t curmem;

  /* Check the input parameters and return if an error */
  status = CheckInputsPartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag,
                numflag, ndims, xyz, ncon, nparts, tpwgts, ubvec, options, 
                edgecut, part, comm);
  if (GlobalSEMinComm(*comm, status) == 0)
    return METIS_ERROR;

  status = METIS_OK;
  curmem = gk_GetCurMemoryUsed();

  /* Setup the ctrl */
  ctrl = SetupCtrl(PARMETIS_OP_GKMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm);
  npes = ctrl->npes;
  mype = ctrl->mype;

  /* Take care the nparts == 1 case */
  if (*nparts == 1) {
    iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part);
    *edgecut = 0;
    goto DONE;

  /* Take care of npes == 1 case */
  if (npes == 1) {
    nvtxs = vtxdist[1] - vtxdist[0];  /* subtraction is required when numflag==1 */

    moptions[METIS_OPTION_NUMBERING] = *numflag;

    status = METIS_PartGraphKway(&nvtxs, ncon, xadj, adjncy, vwgt, NULL, adjwgt, 
                 nparts, tpwgts, ubvec, moptions, edgecut, part);

    goto DONE;

  /* Setup the graph */
  if (*numflag > 0)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag);
  gk_free((void **)&graph->nvwgt, LTERM); 

  /* Allocate the workspace */
  AllocateWSpace(ctrl, 10*graph->nvtxs);

  /* Compute the initial npes-way partitioning geometric partitioning */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  Coordinate_Partition(ctrl, graph, *ndims, xyz, 1);

  STOPTIMER(ctrl, ctrl->TotalTmr);

  /* Move the graph according to the partitioning */
  STARTTIMER(ctrl, ctrl->MoveTmr);

  ctrl->nparts = npes;
  mgraph = MoveGraph(ctrl, graph);
  ctrl->nparts = *nparts;

  SetupGraph_nvwgts(ctrl, mgraph); /* compute nvwgts for the moved graph */

  if (ctrl->dbglvl&DBG_INFO) {
    CommInterfaceData(ctrl, graph, graph->where, graph->where+graph->nvtxs);
    for (cut=0, i=0; i<graph->nvtxs; i++) {
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
        if (graph->where[i] != graph->where[graph->adjncy[j]])
          cut += graph->adjwgt[j];
    gcut     = GlobalSESum(ctrl, cut)/2;
    maxnvtxs = GlobalSEMax(ctrl, mgraph->nvtxs);
    balance  = (real_t)(maxnvtxs)/((real_t)(graph->gnvtxs)/(real_t)(npes));
    rprintf(ctrl, "XYZ Cut: %6"PRIDX" \tBalance: %6.3"PRREAL" [%"PRIDX" %"PRIDX" %"PRIDX"]\n",
       gcut, balance, maxnvtxs, graph->gnvtxs, npes);

  STOPTIMER(ctrl, ctrl->MoveTmr);

  /* Compute the partition of the moved graph */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts));

  if (vtxdist[npes] < SMALLGRAPH 
      || vtxdist[npes] < npes*20 
      || GlobalSESum(ctrl, mgraph->nedges) == 0) { /* serially */
    IFSET(ctrl->dbglvl, DBG_INFO, 
        rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes]));
    PartitionSmallGraph(ctrl, mgraph);
  else { /* in parallel */
    Global_Partition(ctrl, mgraph);

  ParallelReMapGraph(ctrl, mgraph);

  /* Invert the ordering back to the original graph */
  ctrl->nparts = npes;
  ProjectInfoBack(ctrl, graph, part, mgraph->where);
  ctrl->nparts = *nparts;

  *edgecut = mgraph->mincut;

  STOPTIMER(ctrl, ctrl->TotalTmr);

  /* Print some stats */
  IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl));
  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm));
  IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, mgraph, 0));


  if (*numflag > 0)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));

  return (int)status;
Beispiel #12
void Adaptive_Partition(ctrl_t *ctrl, graph_t *graph)
  idx_t i;
  idx_t tewgt, tvsize;
  real_t gtewgt, gtvsize;
  real_t ubavg, lbavg, *lbvec;


  lbvec = rwspacemalloc(ctrl, graph->ncon);

  /* Set up important data structures */
  CommSetup(ctrl, graph);

  ubavg   = ravg(graph->ncon, ctrl->ubvec);
  tewgt   = isum(graph->nedges, graph->adjwgt, 1);
  tvsize  = isum(graph->nvtxs, graph->vsize, 1);
  gtewgt  = (real_t) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs;  /* The +1/graph->gnvtxs were added to remove any FPE */
  gtvsize = (real_t) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio);

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"]\n", 
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
     (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    AllocateRefinementWorkSpace(ctrl, 2*graph->nedges);

    /* Balance the partition on the coarsest graph */
    graph->where = ismalloc(graph->nvtxs+graph->nrecv, -1, "graph->where");
    icopy(graph->nvtxs, graph->home, graph->where);

    ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    lbavg = ravg(graph->ncon, lbvec);

    if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION)
      Balance_Partition(ctrl, graph);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputePartitionParams(ctrl, graph);
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");

      /* free memory allocated by ComputePartitionParams */
      gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM);

    /* check if no coarsening took place */
    if (graph->finer == NULL) {
      ComputePartitionParams(ctrl, graph);
      KWayBalance(ctrl, graph, graph->ncon);
      KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);
  else {
    /* Coarsen it and partition it */
    switch (ctrl->ps_relation) {
        Match_Local(ctrl, graph);
        Match_Global(ctrl, graph);

    Adaptive_Partition(ctrl, graph->coarser);

    /* project partition and refine */
    ProjectPartition(ctrl, graph);
    ComputePartitionParams(ctrl, graph);

    if (graph->ncon > 1 && graph->level < 4) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      lbavg = ravg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.025) {
        KWayBalance(ctrl, graph, graph->ncon);

    KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");

Beispiel #13
* This function is the entry point of the initial partitioning algorithm.
* This algorithm assembles the graph to all the processors and preceed
* serially.
idx_t Mc_Diffusion(ctrl_t *ctrl, graph_t *graph, idx_t *vtxdist, idx_t *where, 
          idx_t *home, idx_t npasses)
  idx_t h, i, j;
  idx_t nvtxs, nedges, ncon, pass, iter, domain, processor;
  idx_t nparts, mype, npes, nlinks, me, you, wsize;
  idx_t nvisited, nswaps = -1, tnswaps, done, alldone = -1;
  idx_t *rowptr, *colind, *diff_where, *sr_where, *ehome, *map, *rmap;
  idx_t *pack, *unpack, *match, *proc2sub, *sub2proc;
  idx_t *visited, *gvisited;
  real_t *transfer, *npwgts, maxdiff, minflow, maxflow;
  real_t lbavg, oldlbavg, ubavg, *lbvec;
  real_t *diff_flows, *sr_flows;
  real_t diff_lbavg, sr_lbavg, diff_cost, sr_cost;
  idx_t *rbuffer, *sbuffer; 
  idx_t *rcount, *rdispl;
  real_t *solution, *load, *workspace;
  matrix_t matrix;
  graph_t *egraph;

  if (graph->ncon > 3)
    return 0;


  nvtxs  = graph->nvtxs;
  nedges = graph->nedges;
  ncon   = graph->ncon;

  nparts = ctrl->nparts;
  mype   = ctrl->mype;
  npes   = ctrl->npes;
  ubavg  = ravg(ncon, ctrl->ubvec);

  /* initialize variables and allocate memory */
  lbvec      = rwspacemalloc(ctrl, ncon);
  diff_flows = rwspacemalloc(ctrl, ncon);
  sr_flows   = rwspacemalloc(ctrl, ncon);

  load                       = rwspacemalloc(ctrl, nparts);
  solution                   = rwspacemalloc(ctrl, nparts);
  npwgts = graph->gnpwgts    = rwspacemalloc(ctrl, ncon*nparts);
  matrix.values              = rwspacemalloc(ctrl, nedges);
  transfer = matrix.transfer = rwspacemalloc(ctrl, ncon*nedges);

  proc2sub               = iwspacemalloc(ctrl, gk_max(nparts, npes*2));
  sub2proc               = iwspacemalloc(ctrl, nparts);
  match                  = iwspacemalloc(ctrl, nparts);
  rowptr = matrix.rowptr = iwspacemalloc(ctrl, nparts+1);
  colind = matrix.colind = iwspacemalloc(ctrl, nedges);

  rcount = iwspacemalloc(ctrl, npes);
  rdispl = iwspacemalloc(ctrl, npes+1);

  pack       = iwspacemalloc(ctrl, nvtxs);
  unpack     = iwspacemalloc(ctrl, nvtxs);
  rbuffer    = iwspacemalloc(ctrl, nvtxs);
  sbuffer    = iwspacemalloc(ctrl, nvtxs);
  map        = iwspacemalloc(ctrl, nvtxs);
  rmap       = iwspacemalloc(ctrl, nvtxs);
  diff_where = iwspacemalloc(ctrl, nvtxs);
  ehome      = iwspacemalloc(ctrl, nvtxs);

  wsize = gk_max(sizeof(real_t)*nparts*6, sizeof(idx_t)*(nvtxs+nparts*2+1));
  workspace = (real_t *)gk_malloc(wsize, "Mc_Diffusion: workspace");

  graph->ckrinfo = (ckrinfo_t *)gk_malloc(nvtxs*sizeof(ckrinfo_t), "Mc_Diffusion: rinfo");

  /* construct subdomain connectivity graph */
  matrix.nrows = nparts;
  SetUpConnectGraph(graph, &matrix, (idx_t *)workspace);
  nlinks = (matrix.nnzs-nparts) / 2;

  visited  = iwspacemalloc(ctrl, matrix.nnzs);
  gvisited = iwspacemalloc(ctrl, matrix.nnzs);

  for (pass=0; pass<npasses; pass++) {
    rset(matrix.nnzs*ncon, 0.0, transfer);
    iset(matrix.nnzs, 0, gvisited);
    iset(matrix.nnzs, 0, visited);
    iter = nvisited = 0;

    /* compute ncon flow solutions */
    for (h=0; h<ncon; h++) {
      rset(nparts, 0.0, solution);
      ComputeLoad(graph, nparts, load, ctrl->tpwgts, h);

      lbvec[h] = (rmax(nparts, load)+1.0/nparts) * (real_t)nparts;

      ConjGrad2(&matrix, load, solution, 0.001, workspace);
      ComputeTransferVector(ncon, &matrix, solution, transfer, h);

    oldlbavg = ravg(ncon, lbvec);
    tnswaps = 0;
    maxdiff = 0.0;
    for (i=0; i<nparts; i++) {
      for (j=rowptr[i]; j<rowptr[i+1]; j++) {
        maxflow = rmax(ncon, transfer+j*ncon);
        minflow = rmin(ncon, transfer+j*ncon);
        maxdiff = (maxflow - minflow > maxdiff) ? maxflow - minflow : maxdiff;

    while (nvisited < nlinks) {
      /* compute independent sets of subdomains */
      iset(gk_max(nparts, npes*2), UNMATCHED, proc2sub);
      CSR_Match_SHEM(&matrix, match, proc2sub, gvisited, ncon);

      /* set up the packing arrays */
      iset(nparts, UNMATCHED, sub2proc);
      for (i=0; i<npes*2; i++) {
        if (proc2sub[i] == UNMATCHED)

        sub2proc[proc2sub[i]] = i/2;

      iset(npes, 0, rcount);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 

      rdispl[0] = 0;
      for (i=1; i<npes+1; i++)
        rdispl[i] = rdispl[i-1] + rcount[i-1];

      iset(nvtxs, UNMATCHED, unpack);
      for (i=0; i<nvtxs; i++) {
        domain = where[i];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          unpack[rdispl[processor]++] = i;

      SHIFTCSR(i, npes, rdispl);

      iset(nvtxs, UNMATCHED, pack);
      for (i=0; i<rdispl[npes]; i++) {
        ASSERT(unpack[i] != UNMATCHED);
        domain = where[unpack[i]];
        processor = sub2proc[domain];
        if (processor != UNMATCHED) 
          pack[unpack[i]] = i;

      /* Compute the flows */
      if (proc2sub[mype*2] != UNMATCHED) {
        me  = proc2sub[2*mype];
        you = proc2sub[2*mype+1];
        ASSERT(me != you);

        for (j=rowptr[me]; j<rowptr[me+1]; j++) {
          if (colind[j] == you) {
            visited[j] = 1;
            rcopy(ncon, transfer+j*ncon, diff_flows);

        for (j=rowptr[you]; j<rowptr[you+1]; j++) {
          if (colind[j] == me) {
            visited[j] = 1;
            for (h=0; h<ncon; h++) {
              if (transfer[j*ncon+h] > 0.0)
                diff_flows[h] = -1.0 * transfer[j*ncon+h];

        nswaps = 1;
        rcopy(ncon, diff_flows, sr_flows);

        iset(nvtxs, 0, sbuffer);
        for (i=0; i<nvtxs; i++) {
          if (where[i] == me || where[i] == you)
            sbuffer[i] = 1;

        egraph = ExtractGraph(ctrl, graph, sbuffer, map, rmap);

        if (egraph != NULL) {
          icopy(egraph->nvtxs, egraph->where, diff_where);
          for (j=0; j<egraph->nvtxs; j++)
            ehome[j] = home[map[j]];
          RedoMyLink(ctrl, egraph, ehome, me, you, sr_flows, &sr_cost, &sr_lbavg);

          if (ncon <= 4) {
            sr_where      = egraph->where;
            egraph->where = diff_where;

            nswaps = BalanceMyLink(ctrl, egraph, ehome, me, you, diff_flows, maxdiff, 
                         &diff_cost, &diff_lbavg, 1.0/(real_t)nvtxs);

            if ((sr_lbavg < diff_lbavg &&
                (diff_lbavg >= ubavg-1.0 || sr_cost == diff_cost)) ||
                (sr_lbavg < ubavg-1.0 && sr_cost < diff_cost)) {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = sr_where[i];
            else {
              for (i=0; i<egraph->nvtxs; i++)
                where[map[i]] = diff_where[i];
          else {
            for (i=0; i<egraph->nvtxs; i++)
              where[map[i]] = egraph->where[i];

          gk_free((void **)&egraph->xadj, &egraph->nvwgt, &egraph->adjncy, &egraph, LTERM);

        /* Pack the flow data */
        iset(nvtxs, UNMATCHED, sbuffer);
        for (i=0; i<nvtxs; i++) {
          domain = where[i];
          if (domain == you || domain == me) 
            sbuffer[pack[i]] = where[i];

      /* Broadcast the flow data */
      gkMPI_Allgatherv((void *)&sbuffer[rdispl[mype]], rcount[mype], IDX_T, 
          (void *)rbuffer, rcount, rdispl, IDX_T, ctrl->comm);

      /* Unpack the flow data */
      for (i=0; i<rdispl[npes]; i++) {
        if (rbuffer[i] != UNMATCHED) 
          where[unpack[i]] = rbuffer[i];

      /* Do other stuff */
      gkMPI_Allreduce((void *)visited, (void *)gvisited, matrix.nnzs,
          IDX_T, MPI_MAX, ctrl->comm);
      nvisited = isum(matrix.nnzs, gvisited, 1)/2;
      tnswaps += GlobalSESum(ctrl, nswaps);

      if (iter++ == NGD_PASSES)

    /* perform serial refinement */
    Mc_ComputeSerialPartitionParams(ctrl, graph, nparts);
    Mc_SerialKWayAdaptRefine(ctrl, graph, nparts, home, ctrl->ubvec, 10);

    /* check for early breakout */
    for (h=0; h<ncon; h++) {
      lbvec[h] = (real_t)(nparts) *
    lbavg = ravg(ncon, lbvec);

    done = 0;
    if (tnswaps == 0 || lbavg >= oldlbavg || lbavg <= ubavg + 0.035)
      done = 1;

    alldone = GlobalSEMax(ctrl, done);
    if (alldone == 1)

  /* ensure that all subdomains have at least one vertex */
  iset(nparts, 0, match);
  for (i=0; i<nvtxs; i++)

  done = 0;
  while (done == 0) {
    done = 1;

    me = iargmin(nparts, match);  
    if (match[me] == 0) {
      if (ctrl->mype == PE) printf("WARNING: empty subdomain %"PRIDX" in Mc_Diffusion\n", me);
      you = iargmax(nparts, match);  
      for (i=0; i<nvtxs; i++) {
        if (where[i] == you) {
          where[i] = me;
          done = 0;
  /* now free memory and return */
  gk_free((void **)&workspace, (void **)&graph->ckrinfo, LTERM);
  graph->gnpwgts = NULL;
  graph->ckrinfo = NULL;


  return 0;
Beispiel #14
* This function is the driver to the multi-constraint partitioning algorithm.
void Moc_Global_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
  int i, ncon, nparts;
  floattype ftmp, ubavg, lbavg, lbvec[MAXNCON];
  ncon = graph->ncon;
  nparts = ctrl->nparts;
  ubavg = savg(graph->ncon, ctrl->ubvec);

  SetUp(ctrl, graph, wspace);

  if (ctrl->dbglvl&DBG_PROGRESS) {
    rprintf(ctrl, "[%6d %8d %5d %5d] [%d] [", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges),
	    GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo);
    for (i=0; i<ncon; i++)
      rprintf(ctrl, " %.3f", GlobalSEMinFloat(ctrl,graph->nvwgt[samin_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i]));  
    rprintf(ctrl, "] [");
    for (i=0; i<ncon; i++)
      rprintf(ctrl, " %.3f", GlobalSEMaxFloat(ctrl, graph->nvwgt[samax_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i]));  
    rprintf(ctrl, "]\n");

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
	(graph->finer != NULL &&
	graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    /* Done with coarsening. Find a partition */
    graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "graph->where");
    Moc_InitPartition_RB(ctrl, graph, wspace);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");

    /* In case no coarsening took place */
    if (graph->finer == NULL) {
      Moc_ComputePartitionParams(ctrl, graph, wspace);
      Moc_KWayFM(ctrl, graph, wspace, NGR_PASSES);
  else {
    Moc_GlobalMatch_Balance(ctrl, graph, wspace);

    Moc_Global_Partition(ctrl, graph->coarser, wspace);

    Moc_ProjectPartition(ctrl, graph, wspace);
    Moc_ComputePartitionParams(ctrl, graph, wspace);

    if (graph->ncon > 1 && graph->level < 3) {
      for (i=0; i<ncon; i++) {
        ftmp = ssum_strd(nparts, graph->gnpwgts+i, ncon);
        if (ftmp != 0.0)
          lbvec[i] = (floattype)(nparts) *
          graph->gnpwgts[samax_strd(nparts, graph->gnpwgts+i, ncon)*ncon+i]/ftmp;
          lbvec[i] = 1.0;
      lbavg = savg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.035) {
        if (ctrl->dbglvl&DBG_PROGRESS) {
          Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
          rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
          for (i=0; i<graph->ncon; i++) 
            rprintf(ctrl, "%.3f ", lbvec[i]);
          rprintf(ctrl, "\n");

        Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);

    Moc_KWayFM(ctrl, graph, wspace, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");

    if (graph->level != 0)
      GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, LTERM);

Beispiel #15
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, 
	      int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, 
	      idxtype *part, MPI_Comm *comm)
  int h, i;
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;
  int tewgt, tvsize, nmoved, maxin, maxout;
  float gtewgt, gtvsize, avg, maximb;
  int ps_relation, seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /* Try and take care bad inputs */
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];
  CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
              NULL, NULL, options, ioptions, part, comm);

  /* ADD: take care of disconnected graph */
  /* ADD: take care of highly unbalanced vtxdist */
  /* Take care the nparts = 1 case */
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); 
    *edgecut = 0;

  /* Set up data structures */
  if (inumflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /* Set up control structures */
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
    ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED;
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
    ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED;

  SetUpCtrl(&ctrl, inparts, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts));
  ctrl.ipc_factor = 1000.0;
  ctrl.redist_factor = 1.0;
  ctrl.redist_base = 1.0;
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = REFINE_PARTITION;
  ctrl.ps_relation = ps_relation;
  ctrl.tpwgts = itpwgts;

  graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize");

  graph->home = idxmalloc(graph->nvtxs, "home");
  if (ctrl.ps_relation == COUPLED)
    idxset(graph->nvtxs, mype, graph->home);
    idxcopy(graph->nvtxs, part, graph->home);

  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs;
  gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl.edge_size_ratio = gtewgt/gtvsize;
  scopy(incon, iubvec, ctrl.ubvec);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /* Partition and Remap */
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Adaptive_Partition(&ctrl, graph, &wspace);
  ParallelReMapGraph(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  idxcopy(graph->nvtxs, graph->where, part);
  if (edgecut != NULL)
    *edgecut = graph->mincut;

  /* Take care of output */
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout);
    rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout);

  /* Free memory, renumber, and return */
  GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM);

  GKfree((void **)&itpwgts, LTERM);
  FreeInitialGraphAndRemap(graph, iwgtflag);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

Beispiel #16
* This function computes the assignment using the the objective the 
* minimization of the total volume of data that needs to move
void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map,
     WorkSpaceType *wspace, int npasses, int ncon)
  int i, ii, j, k, nparts, mype;
  int pass, maxipwgt, nmapped, oldwgt, newwgt, done;
  idxtype *rowmap, *mylpwgts;
  KeyValueType *recv, send;
  int nsaved, gnsaved;

  mype   = ctrl->mype;
  nparts = ctrl->nparts;

  recv     = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv");
  mylpwgts = idxmalloc(nparts, "mylpwgts");

  done = nmapped = 0;
  idxset(nparts, -1, map);
  rowmap = idxset(nparts, -1, wspace->pv3);
  idxcopy(nparts, lpwgts, mylpwgts);
  for (pass=0; pass<npasses; pass++) {
    maxipwgt = idxamax(nparts, mylpwgts);

    if (mylpwgts[maxipwgt] > 0 && !done) {
      send.key = -mylpwgts[maxipwgt];
      send.val = mype*nparts+maxipwgt;
    else {
      send.key = 0;
      send.val = -1;

    /* each processor sends its selection */
    MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); 

    ikeysort(nparts, recv);
    if (recv[0].key == 0)

    /* now make as many assignments as possible */
    for (ii=0; ii<nparts; ii++) {
      i = recv[ii].val;

      if (i == -1)

      j = i % nparts;
      k = i / nparts;
      if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) {
        map[j] = k;
        rowmap[k] = j;
        mylpwgts[j] = 0;
        if (mype == k)
          done = 1;

      if (nmapped == nparts)

    if (nmapped == nparts)

  /* Map unmapped partitions */
  if (nmapped < nparts) {
    for (i=j=0; j<nparts && nmapped<nparts; j++) {
      if (map[j] == -1) {
        for (; i<nparts; i++) {
          if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) {
            map[j] = i;
            rowmap[i] = j;

  /* check to see if remapping fails (due to dis-similar tpwgts) */
  /* if remapping fails, revert to original mapping */
  if (nmapped < nparts) {
    for (i=0; i<nparts; i++)
      map[i] = i; 
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); 
  else {
    /* check for a savings */
    oldwgt  = lpwgts[mype];
    newwgt  = lpwgts[rowmap[mype]];
    nsaved  = newwgt - oldwgt;
    gnsaved = GlobalSESum(ctrl, nsaved);

    /* undo everything if we don't see a savings */
    if (gnsaved <= 0) {
      for (i=0; i<nparts; i++)
        map[i] = i;
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); 

  GKfree((void **)&recv, (void **)&mylpwgts, LTERM);

Beispiel #17
* This function performs k-way refinement
void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
  int h, i, ii, iii, j, k, c;
  int pass, nvtxs, nedges, ncon;
  int nmoves, nmoved, nswaps, nzgswaps;
/*  int gnswaps, gnzgswaps; */
  int me, firstvtx, lastvtx, yourlastvtx;
  int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight;
  int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
  int nlupd, nsupd, nnbrs, nchanged;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  idxtype *where, *tmp_where, *moved;
  floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill;
  idxtype *update, *supdate, *rupdate, *pe_updates;
  idxtype *changed, *perm, *pperm, *htable;
  idxtype *peind, *recvptr, *sendptr;
  KeyValueType *swchanges, *rwchanges;
  RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
  EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
  floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
  int *nupds_pe;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));

  /* set up common aliases */
  nvtxs = graph->nvtxs;
  nedges = graph->nedges;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  where   = graph->where;
  rinfo   = graph->rinfo;
  lnpwgts = graph->lnpwgts;
  gnpwgts = graph->gnpwgts;
  ubvec   = ctrl->ubvec;
  tpwgts  = ctrl->tpwgts;

  nnbrs = graph->nnbrs;
  peind = graph->peind;
  recvptr = graph->recvptr;
  sendptr = graph->sendptr;

  changed = idxmalloc(nvtxs, "KWR: changed");
  rwchanges = wspace->pairs;
  swchanges = rwchanges + recvptr[nnbrs];

  /* set up important data structures */
  perm = idxmalloc(nvtxs, "KWR: perm");
  pperm = idxmalloc(nparts, "KWR: pperm");

  update = idxmalloc(nvtxs, "KWR: update");
  supdate = wspace->indices;
  rupdate = supdate + recvptr[nnbrs];
  nupds_pe = imalloc(npes, "KWR: nupds_pe");
  htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
  badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");

  for (i=0; i<nparts; i++) {
    for (h=0; h<ncon; h++) {
      badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];

  movewgts = fmalloc(nparts*ncon, "KWR: movewgts");
  ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts");
  pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts");
  overfill = fmalloc(nparts*ncon, "KWR: overfill");
  moved = idxmalloc(nvtxs, "KWR: moved");
  tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
  tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
  tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");

  idxcopy(nvtxs+graph->nrecv, where, tmp_where);
  for (i=0; i<nvtxs; i++) {
    tmp_rinfo[i].id = rinfo[i].id;
    tmp_rinfo[i].ed = rinfo[i].ed;
    tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
    tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];

    for (j=0; j<rinfo[i].ndegrees; j++) {
      tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
      tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;

  nswaps = nzgswaps = 0;
  /* perform a small number of passes through the vertices */
  for (pass=0; pass<npasses; pass++) {
    if (mype == 0)
      RandomPermute(nparts, pperm, 1);
    MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
    FastRandomPermute(nvtxs, perm, 1);
    oldcut = graph->mincut;

    /* check to see if the partitioning is imbalanced */
    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    ubavg = savg(ncon, ubvec);
    lbavg = savg(ncon, lbvec);
    imbalanced = (lbavg > ubavg) ? 1 : 0;

    for (c=0; c<2; c++) {
      scopy(ncon*nparts, gnpwgts, ognpwgts);
      sset(ncon*nparts, 0.0, movewgts);
      nmoved = 0;

      /* PASS ONE -- record stats for desired moves */
      for (iii=0; iii<nvtxs; iii++) {
        i = perm[iii];
        from = tmp_where[i];
        nvwgt = graph->nvwgt+i*ncon;

        for (h=0; h<ncon; h++)
          if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)

        if (h < ncon) {

        /* check for a potential improvement */
        if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
            to = my_edegrees[k].edge;
            if (ProperSide(c, pperm[from], pperm[to])) {
              for (h=0; h<ncon; h++)
                if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)

              if (h == ncon)
          oldto = to;

          /* check if a subdomain was found that fits */
          if (k < tmp_rinfo[i].ndegrees) {
            for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
              to = my_edegrees[j].edge;
              if (ProperSide(c, pperm[from], pperm[to])) {
                for (h=0; h<ncon; h++)
                  if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)

                if (h == ncon) {
                  if (my_edegrees[j].ewgt > my_edegrees[k].ewgt ||
                   (my_edegrees[j].ewgt == my_edegrees[k].ewgt &&
                    k = j;
                    oldto = my_edegrees[k].edge;
            to = oldto;

            if (my_edegrees[k].ewgt > tmp_rinfo[i].id ||
            (my_edegrees[k].ewgt == tmp_rinfo[i].id &&
            (imbalanced ||  graph->level > 3  || iii % 8 == 0) &&

              /* Update tmp arrays of the moved vertex */
              tmp_where[i] = to;
              moved[nmoved++] = i;
              for (h=0; h<ncon; h++) {
                lnpwgts[to*ncon+h] += nvwgt[h];
                lnpwgts[from*ncon+h] -= nvwgt[h];
                gnpwgts[to*ncon+h] += nvwgt[h];
                gnpwgts[from*ncon+h] -= nvwgt[h];
                movewgts[to*ncon+h] += nvwgt[h];
                movewgts[from*ncon+h] -= nvwgt[h];

              tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
              SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
              if (my_edegrees[k].ewgt == 0) {
                my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
                my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
              else {
                my_edegrees[k].edge = from;

              /* Update the degrees of adjacent vertices */
              for (j=xadj[i]; j<xadj[i+1]; j++) {
                /* no need to bother about vertices on different pe's */
                if (ladjncy[j] >= nvtxs)

                me = ladjncy[j];
                mydomain = tmp_where[me];

                myrinfo = tmp_rinfo+me;
                your_edegrees = myrinfo->degrees;

                if (mydomain == from) {
                  INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
                else {
                  if (mydomain == to) {
                    INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);

                /* Remove contribution from the .ed of 'from' */
                if (mydomain != from) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == from) {
                      if (your_edegrees[k].ewgt == adjwgt[j]) {
                        your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                        your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                      else {
                        your_edegrees[k].ewgt -= adjwgt[j];

                /* Add contribution to the .ed of 'to' */
                if (mydomain != to) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == to) {
                      your_edegrees[k].ewgt += adjwgt[j];
                  if (k == myrinfo->ndegrees) {
                    your_edegrees[myrinfo->ndegrees].edge = to;
                    your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];

      /* Let processors know the subdomain wgts */
      /* if all proposed moves commit.          */
      MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);

      /* compute overfill array */
      overweight = 0;
      for (j=0; j<nparts; j++) {
        for (h=0; h<ncon; h++) {
          if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) {
            overfill[j*ncon+h] =
            (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) /
          else {
            overfill[j*ncon+h] = 0.0;

          overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0);
          overfill[j*ncon+h] *= movewgts[j*ncon+h];

          if (overfill[j*ncon+h] > 0.0)
            overweight = 1;

          ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] ||
          pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h],
          (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h],
          badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h]));

      /* select moves to undo according to overfill array */
      if (overweight == 1) {
        for (iii=0; iii<nmoved; iii++) {
          i = moved[iii];
          oldto = tmp_where[i];
          nvwgt = graph->nvwgt+i*ncon;
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++)
            if (my_edegrees[k].edge == where[i])

          for (h=0; h<ncon; h++)
            if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0)

          /* nullify this move if necessary */
          if (k != tmp_rinfo[i].ndegrees && h != ncon) {
            moved[iii] = -1;
            from = oldto;
            to = where[i];

            for (h=0; h<ncon; h++) {
              overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0);

            tmp_where[i] = to;
            tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
            SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
            if (my_edegrees[k].ewgt == 0) {
              my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
              my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
            else {
              my_edegrees[k].edge = from;

            for (h=0; h<ncon; h++) {
              lnpwgts[to*ncon+h] += nvwgt[h];
              lnpwgts[from*ncon+h] -= nvwgt[h];

            /* Update the degrees of adjacent vertices */
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              /* no need to bother about vertices on different pe's */
              if (ladjncy[j] >= nvtxs)

              me = ladjncy[j];
              mydomain = tmp_where[me];

              myrinfo = tmp_rinfo+me;
              your_edegrees = myrinfo->degrees;

              if (mydomain == from) {
                INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
              else {
                if (mydomain == to) {
                  INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);

              /* Remove contribution from the .ed of 'from' */
              if (mydomain != from) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == from) {
                    if (your_edegrees[k].ewgt == adjwgt[j]) {
                      your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                      your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                    else {
                      your_edegrees[k].ewgt -= adjwgt[j];

              /* Add contribution to the .ed of 'to' */
              if (mydomain != to) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == to) {
                    your_edegrees[k].ewgt += adjwgt[j];
                if (k == myrinfo->ndegrees) {
                  your_edegrees[myrinfo->ndegrees].edge = to;
                  your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];

      /* PASS TWO -- commit the remainder of the moves */
      nlupd = nsupd = nmoves = nchanged = 0;
      for (iii=0; iii<nmoved; iii++) {
        i = moved[iii];
        if (i == -1)

        where[i] = tmp_where[i];

        /* Make sure to update the vertex information */
        if (htable[i] == 0) {
          /* make sure you do the update */
          htable[i] = 1;
          update[nlupd++] = i;

        /* Put the vertices adjacent to i into the update array */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = ladjncy[j];
          if (htable[k] == 0) {
            htable[k] = 1;
            if (k<nvtxs)
              update[nlupd++] = k;
              supdate[nsupd++] = k;

        /* check number of zero-gain moves */
        for (k=0; k<rinfo[i].ndegrees; k++)
          if (rinfo[i].degrees[k].edge == to)
        if (rinfo[i].id == rinfo[i].degrees[k].ewgt)

        if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
          changed[nchanged++] = i;

      /* Tell interested pe's the new where[] info for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
      swchanges, rwchanges, wspace->pv4); 

      IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
      rprintf(ctrl, "\t[%d %d], [%.4f],  [%d %d %d]\n",
      pass, c, badmaxpwgt[0],
      GlobalSESum(ctrl, nmoves),
      GlobalSESum(ctrl, nsupd),
      GlobalSESum(ctrl, nlupd)));

      / Time to communicate with processors to send the vertices
      / whose degrees need to be update.
      /* Issue the receives first */
      for (i=0; i<nnbrs; i++) {
        MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
                  peind[i], 1, ctrl->comm, ctrl->rreq+i);

      /* Issue the sends next. This needs some preporcessing */
      for (i=0; i<nsupd; i++) {
        htable[supdate[i]] = 0;
        supdate[i] = graph->imap[supdate[i]];
      iidxsort(nsupd, supdate);

      for (j=i=0; i<nnbrs; i++) {
        yourlastvtx = vtxdist[peind[i]+1];
        for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); 
        MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
        j = k;

      /* OK, now get into the loop waiting for the send/recv operations to finish */
      MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
      for (i=0; i<nnbrs; i++) 
        MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
      MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);

      / Place the recieved to-be updated vertices into update[] 
      for (i=0; i<nnbrs; i++) {
        pe_updates = rupdate+sendptr[i];
        for (j=0; j<nupds_pe[i]; j++) {
          k = pe_updates[j];
          if (htable[k-firstvtx] == 0) {
            htable[k-firstvtx] = 1;
            update[nlupd++] = k-firstvtx;

      / Update the rinfo of the vertices in the update[] array
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        ASSERT(ctrl, htable[i] == 1);

        htable[i] = 0;

        mydomain = where[i];
        myrinfo = rinfo+i;
        tmp_myrinfo = tmp_rinfo+i;
        my_edegrees = myrinfo->degrees;
        your_edegrees = tmp_myrinfo->degrees;

        graph->lmincut -= myrinfo->ed;
        myrinfo->ndegrees = 0;
        myrinfo->id = 0;
        myrinfo->ed = 0;

        for (j=xadj[i]; j<xadj[i+1]; j++) {
          yourdomain = where[ladjncy[j]];
          if (mydomain != yourdomain) {
            myrinfo->ed += adjwgt[j];

            for (k=0; k<myrinfo->ndegrees; k++) {
              if (my_edegrees[k].edge == yourdomain) {
                my_edegrees[k].ewgt += adjwgt[j];
                your_edegrees[k].ewgt += adjwgt[j];
            if (k == myrinfo->ndegrees) {
              my_edegrees[k].edge = yourdomain;
              my_edegrees[k].ewgt = adjwgt[j];
              your_edegrees[k].edge = yourdomain;
              your_edegrees[k].ewgt = adjwgt[j];
            ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
            ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);

          else {
            myrinfo->id += adjwgt[j];
        graph->lmincut += myrinfo->ed;

        tmp_myrinfo->id = myrinfo->id;
        tmp_myrinfo->ed = myrinfo->ed;
        tmp_myrinfo->ndegrees = myrinfo->ndegrees;

      /* finally, sum-up the partition weights */
      MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);
    graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

    if (graph->mincut == oldcut)

  gnswaps = GlobalSESum(ctrl, nswaps);
  gnzgswaps = GlobalSESum(ctrl, nzgswaps);
  if (mype == 0)
    printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps);

  GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
  GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
  GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM);
  GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
Beispiel #18
* This function finds a matching
void Moc_GlobalMatch_Balance(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
  int h, i, ii, j, k;
  int nnbrs, nvtxs, ncon, cnvtxs, firstvtx, lastvtx, maxi, maxidx, nkept;
  int otherlastvtx, nrequests, nchanged, pass, nmatched, wside;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist, *home, *myhome, *shome, *rhome;
  idxtype *match, *rmatch, *smatch;
  idxtype *peind, *sendptr, *recvptr;
  idxtype *perm, *iperm, *nperm, *changed;
  floattype *nvwgt, maxnvwgt;
  int *nreqs_pe;
  KeyValueType *match_requests, *match_granted, *pe_requests;

  maxnvwgt = 1.0/((floattype)(ctrl->nparts)*MAXNVWGT_FACTOR);

  graph->match_type = MATCH_GLOBAL;

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->MatchTmr));

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  home = graph->home;
  nvwgt = graph->nvwgt;

  vtxdist = graph->vtxdist;
  firstvtx = vtxdist[ctrl->mype];
  lastvtx = vtxdist[ctrl->mype+1];

  match = graph->match = idxsmalloc(nvtxs+graph->nrecv, UNMATCHED, "HEM_Match: match");
  myhome = idxsmalloc(nvtxs+graph->nrecv, UNMATCHED, "HEM_Match: myhome");

  / Send/Receive the home information of interface vertices
  if (ctrl->partType == ADAPTIVE_PARTITION || ctrl->partType == REFINE_PARTITION) {
    idxcopy(nvtxs, home, myhome);
    shome = wspace->indices;
    rhome = myhome + nvtxs;
    CommInterfaceData(ctrl, graph, myhome, shome, rhome);

  nnbrs = graph->nnbrs;
  peind = graph->peind;
  sendptr = graph->sendptr;
  recvptr = graph->recvptr;

  /* Use wspace->indices as the tmp space for matching info of the boundary
   * vertices that are sent and received */
  rmatch = match + nvtxs;
  smatch = wspace->indices;
  changed = smatch+graph->nsend;

  /* Use wspace->indices as the tmp space for match requests of the boundary
   * vertices that are sent and received */
  match_requests = wspace->pairs;
  match_granted = match_requests + graph->nsend;

  nreqs_pe = ismalloc(nnbrs, 0, "Match_HEM: nreqs_pe");

  nkept = graph->gnvtxs/ctrl->npes - nvtxs;

  perm = (idxtype *)wspace->degrees;
  iperm = perm + nvtxs;
  FastRandomPermute(nvtxs, perm, 1);
  for (i=0; i<nvtxs; i++)
    iperm[perm[i]] = i;

  nperm = iperm + nvtxs;
  for (i=0; i<nnbrs; i++)
    nperm[i] = i;

   * Go now and find a matching by doing multiple iterations
  /* First nullify the heavy vertices */
  for (nchanged=i=0; i<nvtxs; i++) {
    for (h=0; h<ncon; h++)
      if (nvwgt[i*ncon+h] > maxnvwgt) {

    if (h != ncon) {
      match[i] = TOO_HEAVY;
  if (GlobalSESum(ctrl, nchanged) > 0) {
    IFSET(ctrl->dbglvl, DBG_PROGRESS,
    rprintf(ctrl, "We found %d heavy vertices!\n", GlobalSESum(ctrl, nchanged)));
    CommInterfaceData(ctrl, graph, match, smatch, rmatch);

  for (nmatched=pass=0; pass<NMATCH_PASSES; pass++) {
    wside = (graph->level+pass)%2;
    nchanged = nrequests = 0;
    for (ii=nmatched; ii<nvtxs; ii++) {
      i = perm[ii];
      if (match[i] == UNMATCHED) {  /* Unmatched */
        maxidx = i;
        maxi = -1;

        /* Find a heavy-edge matching */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = ladjncy[j];
          if (match[k] == UNMATCHED &&
               myhome[k] == myhome[i] &&
               (maxi == -1 ||
               adjwgt[maxi] < adjwgt[j] ||
               (maxidx < nvtxs &&
               k < nvtxs &&
               adjwgt[maxi] == adjwgt[j] &&
               BetterVBalance(ncon,nvwgt+i*ncon,nvwgt+maxidx*ncon,nvwgt+k*ncon) >= 0))) {
            maxi = j;
            maxidx = k;

        if (maxi != -1) {
          k = ladjncy[maxi];
          if (k < nvtxs) { /* Take care the local vertices first */
            /* Here we give preference the local matching by granting it right away */
            if (i <= k) {
              match[i] = firstvtx+k + KEEP_BIT;
              match[k] = firstvtx+i;
            else {
              match[i] = firstvtx+k;
              match[k] = firstvtx+i + KEEP_BIT;
            changed[nchanged++] = i;
            changed[nchanged++] = k;
          else { /* Take care any remote boundary vertices */
            match[k] = MAYBE_MATCHED;
            /* Alternate among which vertices will issue the requests */
            if ((wside ==0 && firstvtx+i < graph->imap[k]) || (wside == 1 && firstvtx+i > graph->imap[k])) { 
              match[i] = MAYBE_MATCHED;
              match_requests[nrequests].key = graph->imap[k];
              match_requests[nrequests].val = firstvtx+i;

    PrintVector2(ctrl, nvtxs, firstvtx, match, "Match1");
    myprintf(ctrl, "[c: %2d] Nlocal: %d, Nrequests: %d\n", c, nlocal, nrequests);

    * Exchange the match_requests, requests for me are stored in
    * match_granted 
    /* Issue the receives first. Note that from each PE can receive a maximum
       of the interface node that it needs to send it in the case of a mat-vec */
    for (i=0; i<nnbrs; i++) {
      MPI_Irecv((void *)(match_granted+recvptr[i]), 2*(recvptr[i+1]-recvptr[i]), IDX_DATATYPE,
                peind[i], 1, ctrl->comm, ctrl->rreq+i);

    /* Issue the sends next. This needs some work */
    ikeysort(nrequests, match_requests);
    for (j=i=0; i<nnbrs; i++) {
      otherlastvtx = vtxdist[peind[i]+1];
      for (k=j; k<nrequests && match_requests[k].key < otherlastvtx; k++);
      MPI_Isend((void *)(match_requests+j), 2*(k-j), IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
      j = k;

    /* OK, now get into the loop waiting for the operations to finish */
    MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
    for (i=0; i<nnbrs; i++) {
      MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nreqs_pe+i);
      nreqs_pe[i] = nreqs_pe[i]/2;  /* Adjust for pairs of IDX_DATATYPE */
    MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);

    * Now, go and service the requests that you received in 
    * match_granted 
    RandomPermute(nnbrs, nperm, 0);
    for (ii=0; ii<nnbrs; ii++) {
      i = nperm[ii];
      pe_requests = match_granted+recvptr[i];
      for (j=0; j<nreqs_pe[i]; j++) {
        k = pe_requests[j].key;
        ASSERTP(ctrl, k >= firstvtx && k < lastvtx, (ctrl, "%d %d %d %d %d\n", firstvtx, lastvtx, k, j, peind[i]));
        /* myprintf(ctrl, "Requesting a match %d %d\n", pe_requests[j].key, pe_requests[j].val); */
        if (match[k-firstvtx] == UNMATCHED) { /* Bingo, lets grant this request */
          changed[nchanged++] = k-firstvtx;
          if (nkept >= 0) { /* Flip a coin for who gets it */
            match[k-firstvtx] = pe_requests[j].val + KEEP_BIT;
          else {
            match[k-firstvtx] = pe_requests[j].val;
            pe_requests[j].key += KEEP_BIT;
          /* myprintf(ctrl, "Request from pe:%d (%d %d) granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ 
        else { /* We are not granting the request */
          /* myprintf(ctrl, "Request from pe:%d (%d %d) not granted!\n", peind[i], pe_requests[j].val, pe_requests[j].key); */ 
          pe_requests[j].key = UNMATCHED;

    * Exchange the match_granted information. It is stored in
    * match_requests 
    /* Issue the receives first. Note that from each PE can receive a maximum
       of the interface node that it needs to send during the case of a mat-vec */
    for (i=0; i<nnbrs; i++) {
      MPI_Irecv((void *)(match_requests+sendptr[i]), 2*(sendptr[i+1]-sendptr[i]), IDX_DATATYPE,
                peind[i], 1, ctrl->comm, ctrl->rreq+i);

    /* Issue the sends next. */
    for (i=0; i<nnbrs; i++) {
      MPI_Isend((void *)(match_granted+recvptr[i]), 2*nreqs_pe[i], IDX_DATATYPE, 
                peind[i], 1, ctrl->comm, ctrl->sreq+i);

    /* OK, now get into the loop waiting for the operations to finish */
    MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
    for (i=0; i<nnbrs; i++) {
      MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nreqs_pe+i);
      nreqs_pe[i] = nreqs_pe[i]/2;  /* Adjust for pairs of IDX_DATATYPE */
    MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);

    * Now, go and through the match_requests and update local
    * match information for the matchings that were granted.
    for (i=0; i<nnbrs; i++) {
      pe_requests = match_requests+sendptr[i];
      for (j=0; j<nreqs_pe[i]; j++) {
        match[pe_requests[j].val-firstvtx] = pe_requests[j].key;
        if (pe_requests[j].key != UNMATCHED)
          changed[nchanged++] = pe_requests[j].val-firstvtx;

    for (i=0; i<nchanged; i++) {
      ii = iperm[changed[i]];
      perm[ii] = perm[nmatched];
      iperm[perm[nmatched]] = ii;

    CommChangedInterfaceData(ctrl, graph, nchanged, changed, match, match_requests, match_granted, wspace->pv4);

  /* Traverse the vertices and those that were unmatched, match them with themselves */
  cnvtxs = 0;
  for (i=0; i<nvtxs; i++) {
    if (match[i] == UNMATCHED || match[i] == TOO_HEAVY) {
      match[i] = (firstvtx+i) + KEEP_BIT;
    else if (match[i] >= KEEP_BIT) {  /* A matched vertex which I get to keep */

  if (ctrl->dbglvl&DBG_MATCHINFO) {
    PrintVector2(ctrl, nvtxs, firstvtx, match, "Match");
    myprintf(ctrl, "Cnvtxs: %d\n", cnvtxs);
    rprintf(ctrl, "Done with matching...\n");

  GKfree((void **)(&myhome), (void **)(&nreqs_pe), LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->MatchTmr));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr));

  Moc_Global_CreateCoarseGraph(ctrl, graph, wspace, cnvtxs);

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->ContractTmr));
