int
SCOTCH_dgraphGrow (
SCOTCH_Dgraph * const       orggrafptr,
const SCOTCH_Num            seedlocnbr,
SCOTCH_Num * const          seedloctab,
const SCOTCH_Num            distval,
SCOTCH_Num * const          partgsttab)
{
  Gnum                bandvertlocnbr;             /* Not used */
  Gnum                bandvertlvlnum;             /* Not used */
  Gnum                bandedgelocsiz;             /* Not used */

  Dgraph * restrict const grafptr = (Dgraph *) orggrafptr;

  if (dgraphGhst (grafptr) != 0) {                /* Compute ghost edge array if not already present */
    errorPrint ("SCOTCH_dgraphGrow: cannot compute ghost edge array");
    return     (1);
  }

  return ((((grafptr->flagval & DGRAPHCOMMPTOP) != 0) ? dgraphGrowPtop : dgraphGrowColl)
          (grafptr, seedlocnbr, seedloctab, distval, partgsttab - grafptr->baseval, &bandvertlvlnum, &bandvertlocnbr, &bandedgelocsiz));
}
int
bdgraphBipartBd (
Bdgraph * const                     orggrafptr,   /*+ Distributed graph +*/
const BdgraphBipartBdParam * const  paraptr)      /*+ Method parameters +*/
{
  Bdgraph                 bndgrafdat;             /* Bipartitioning band graph structure                          */
  Gnum                    bndvertancnnd;          /* End of local vertex array, without anchors                   */
  Gnum                    bndvertlocnbr1;         /* Number of band graph vertices in part 1 except anchor 1      */
  Gnum                    bndvertlocnum;
  Gnum                    bndvertlvlnum;          /* Based number of first band vertex in last layer              */
  Gnum                    bndvertlocancadj;       /* Flag set when anchor(s) represent unexistent vertices        */
  Gnum                    bndvertglbancadj;       /* Global adjustment of anchor vertices                         */
  Gnum                    bndveexlocsum;          /* Local sum of veexloctax array cells for band graph           */
  Gnum                    bndveexlocsum0;         /* Local sum of veexloctax array cells in part 0 for band graph */
  Gnum                    bndedlolocval;
  Gnum                    bndfronlocnum;
  Gnum                    orgfronlocnum;
  int * restrict          orgflagloctab;
  Gnum                    orgvertlocnum;
  Gnum                    orgedlolocval;
  const int * restrict    orgprocsidtab;
  int                     orgprocsidnbr;
  int                     orgprocsidnum;
  int                     orgprocsidval;
  Gnum                    complocsizeadj0;
  Gnum                    commlocloadintn;
  Gnum                    commlocloadintn2;       /* Twice twice (4 times) the internal communication load of last layer */
  Gnum                    commlocloadextn;
  Gnum                    commlocgainextn;
  Gnum                    reduloctab[7];
  Gnum                    reduglbtab[7];
  DgraphHaloRequest       requdat;

  if (orggrafptr->fronglbnbr == 0)                /* If no separator vertices, apply strategy to full (original) graph */
    return (bdgraphBipartSt (orggrafptr, paraptr->stratorg));

  if (dgraphBand (&orggrafptr->s, orggrafptr->fronlocnbr, orggrafptr->fronloctab, orggrafptr->partgsttax,
                  orggrafptr->complocload0, orggrafptr->s.velolocsum - orggrafptr->complocload0, paraptr->distmax,
                  &bndgrafdat.s, &bndgrafdat.fronloctab, &bndgrafdat.partgsttax,
                  &bndvertlvlnum, &bndvertlocnbr1, &bndvertlocancadj) != 0) {
    errorPrint ("bdgraphBipartBd: cannot create band graph");
    return     (1);
  }
  bndvertancnnd = bndgrafdat.s.vertlocnnd - 2;

  reduloctab[0]  = 0;                             /* Assume no memory allocation problem */
  bndveexlocsum  =
  bndveexlocsum0 = 0;
  bndgrafdat.veexloctax = NULL;                   /* Assume no external gains */
  if (orggrafptr->veexloctax != NULL) {
    if ((bndgrafdat.veexloctax = memAlloc (bndgrafdat.s.vertlocnbr * sizeof (Gnum))) == NULL) {
      errorPrint ("bdgraphBipartBd: out of memory (1)");
      reduloctab[0] = 1;                         /* Memory error */
    }
    else {
      Gnum                bndvertlocnum;

      bndgrafdat.veexloctax -= bndgrafdat.s.baseval;

      for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) {
        Gnum                veexval;

        veexval = orggrafptr->veexloctax[bndgrafdat.s.vnumloctax[bndvertlocnum]];
        bndgrafdat.veexloctax[bndvertlocnum] = veexval;
        bndveexlocsum  += veexval;
        bndveexlocsum0 += veexval & (((Gnum) bndgrafdat.partgsttax[bndvertlocnum]) - 1);
      }
    }
  }
  reduloctab[1] = bndgrafdat.s.vendloctax[bndvertancnnd]     - bndgrafdat.s.vertloctax[bndvertancnnd]     - (orggrafptr->s.procglbnbr - 1); /* Anchor degrees */
  reduloctab[2] = bndgrafdat.s.vendloctax[bndvertancnnd + 1] - bndgrafdat.s.vertloctax[bndvertancnnd + 1] - (orggrafptr->s.procglbnbr - 1);

  bndgrafdat.complocsize0 = bndgrafdat.s.vertlocnbr - (bndvertlocnbr1 + 1); /* Add 1 for anchor vertex 1 */
  complocsizeadj0 = orggrafptr->complocsize0 - bndgrafdat.complocsize0; /* -1 less because of anchor 0   */
  reduloctab[3] = bndgrafdat.complocsize0;
  reduloctab[4] = bndvertlocancadj;               /* Sum increases in size and load */
  reduloctab[5] = bndveexlocsum;
  reduloctab[6] = bndveexlocsum0;
  if (MPI_Allreduce (reduloctab, reduglbtab, 7, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (1)");
    return     (1);
  }
  if (reduglbtab[0] != 0) {
    bdgraphExit (&bndgrafdat);
    return      (1);
  }
  if ((reduglbtab[1] == 0) ||                     /* If graph is too small to have any usable anchors */
      (reduglbtab[2] == 0)) {
    bdgraphExit (&bndgrafdat);
    return      (bdgraphBipartSt (orggrafptr, paraptr->stratorg));
  }

  bndvertglbancadj            = reduglbtab[4];
  bndgrafdat.veexglbsum       = orggrafptr->veexglbsum; /* All external gains preserved                  */
  bndgrafdat.fronlocnbr       = orggrafptr->fronlocnbr; /* All separator vertices are kept in band graph */
  bndgrafdat.fronglbnbr       = orggrafptr->fronglbnbr;
  bndgrafdat.complocload0     = orggrafptr->complocload0    + bndvertlocancadj; /* All loads are kept in band graph */
  bndgrafdat.compglbload0     = orggrafptr->compglbload0    + bndvertglbancadj;
  bndgrafdat.compglbload0min  = orggrafptr->compglbload0min + bndvertglbancadj; /* Tilt extrema loads according to adjustments */
  bndgrafdat.compglbload0max  = orggrafptr->compglbload0max + bndvertglbancadj;
  bndgrafdat.compglbload0avg  = orggrafptr->compglbload0avg + bndvertglbancadj; /* Tilt average load according to adjustments */
  bndgrafdat.compglbload0dlt  = orggrafptr->compglbload0dlt;
  bndgrafdat.compglbsize0     = reduglbtab[3];
  bndgrafdat.commglbload      = orggrafptr->commglbload;
  bndgrafdat.commglbgainextn  = orggrafptr->commglbgainextn;
  bndgrafdat.commglbloadextn0 = orggrafptr->commglbloadextn0;
  bndgrafdat.commglbgainextn0 = orggrafptr->commglbgainextn0;
  bndgrafdat.bbalglbval       = orggrafptr->bbalglbval;
  bndgrafdat.domndist         = orggrafptr->domndist;
  bndgrafdat.domnwght[0]      = orggrafptr->domnwght[0];
  bndgrafdat.domnwght[1]      = orggrafptr->domnwght[1];
  bndgrafdat.levlnum          = orggrafptr->levlnum;

  if (bndgrafdat.veexloctax != NULL) {
    Gnum                bndveexglbanc0;
    Gnum                bndveexglbanc1;

    bndveexglbanc0 = (orggrafptr->veexglbsum + orggrafptr->commglbgainextn) / 2 - reduglbtab[6]; /* Compute global external gains of anchors */
    bndveexglbanc1 = (orggrafptr->veexglbsum - bndveexglbanc0) - reduglbtab[5];

    bndgrafdat.veexloctax[bndvertancnnd]     = DATASIZE (bndveexglbanc0, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); /* Spread gains across local anchors */
    bndgrafdat.veexloctax[bndvertancnnd + 1] = DATASIZE (bndveexglbanc1, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum);
  }

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (&bndgrafdat) != 0) {
    errorPrint ("bdgraphBipartBd: internal error (1)");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  if (bdgraphBipartSt (&bndgrafdat, paraptr->stratbnd) != 0) { /* Separate distributed band graph */
    errorPrint  ("bdgraphBipartBd: cannot separate band graph");
    bdgraphExit (&bndgrafdat);
    return      (1);
  }

  reduloctab[0] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd]; /* Check if anchor vertices remain in their parts */
  reduloctab[1] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd + 1];
  reduloctab[2] = complocsizeadj0;
  reduloctab[3] = 0;                              /* Assume memory allocation is all right */
  if ((orgflagloctab = memAlloc (flagSize (orggrafptr->s.vertlocnnd) * sizeof (int))) == NULL) { /* Eventually keep space for based indices */
    errorPrint ("bdgraphBipartBd: out of memory (2)");
    reduloctab[3] = 1;
  }

  if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (2)");
    return     (1);
  }

  if (((reduglbtab[0] + reduglbtab[1]) != orggrafptr->s.procglbnbr)         || /* If not all anchors of initial same parts in same parts */
      ((reduglbtab[0] != 0) && (reduglbtab[0] != orggrafptr->s.procglbnbr)) ||
      (reduglbtab[3] != 0)) {
    if (orgflagloctab != NULL)
      memFree (orgflagloctab);
    bdgraphExit (&bndgrafdat);                    /* Apply original strategy to full graph */
    return      (bdgraphBipartSt (orggrafptr, paraptr->stratorg));
  }

  if (dgraphGhst (&bndgrafdat.s) != 0) {          /* Compute ghost edge array if not already present */
    errorPrint ("bdgraphBipartBd: cannot compute ghost edge array");
    return     (1);
  }

  if (reduglbtab[0] == orggrafptr->s.procglbnbr) { /* If all anchors swapped parts, swap all parts of original vertices */
    Gnum                orgvertnum;

    orggrafptr->complocsize0 = orggrafptr->s.vertlocnbr - reduloctab[2] - bndgrafdat.s.vertlocnbr + bndgrafdat.complocsize0;
    orggrafptr->compglbsize0 = orggrafptr->s.vertglbnbr - reduglbtab[2] - bndgrafdat.s.vertglbnbr + bndgrafdat.compglbsize0;

    for (orgvertnum = orggrafptr->s.baseval; orgvertnum < orggrafptr->s.vertlocnnd; orgvertnum ++)
      orggrafptr->partgsttax[orgvertnum] ^= 1;
  }
  else {
    orggrafptr->complocsize0 = reduloctab[2] + bndgrafdat.complocsize0;
    orggrafptr->compglbsize0 = reduglbtab[2] + bndgrafdat.compglbsize0;
  }

  for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) /* Update part array of all vertices except anchors */
    orggrafptr->partgsttax[bndgrafdat.s.vnumloctax[bndvertlocnum]] = bndgrafdat.partgsttax[bndvertlocnum];

  dgraphHaloAsync (&orggrafptr->s, (byte *) (orggrafptr->partgsttax + orggrafptr->s.baseval), GRAPHPART_MPI, &requdat); /* Share part array of full graph */

  commlocloadintn =
  commlocloadextn =
  commlocgainextn = 0;
  bndedlolocval   = 1;                            /* Assume no edge loads */
  for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertlvlnum; bndvertlocnum ++) { /* For all vertices of band graph save for last layer */
    Gnum                bndedgelocnum;
    Gnum                bndedgelocnnd;
    Gnum                bndpartval;

    bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum];
    if (bndgrafdat.veexloctax != NULL) {
      commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval;
      commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2);
    }
    for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum];
         bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) {
      Gnum                bndvertlocend;
      Gnum                bndpartend;

      bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum];
      bndpartend    = bndgrafdat.partgsttax[bndvertlocend];

      if (bndgrafdat.s.edloloctax != NULL)
        bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum];
      commlocloadintn += (bndpartval ^ bndpartend) * bndedlolocval; /* Internal load is accounted for twice */
    }
  }
  for ( ; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer, remove internal loads to band vertices once */
    Gnum                bndedgelocnum;
    Gnum                bndedgelocnnd;
    Gnum                bndpartval;

    bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum];
    if (bndgrafdat.veexloctax != NULL) {
      commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval;
      commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2);
    }
    for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum] - 1; /* "-1" to avoid anchor edges */
         bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) {
      Gnum                bndvertlocend;
      Gnum                bndpartend;

      bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum];
      bndpartend    = bndgrafdat.partgsttax[bndvertlocend];

      if (bndgrafdat.s.edloloctax != NULL)
        bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum];
      commlocloadintn -= (bndpartval ^ bndpartend) * bndedlolocval; /* Remove internal loads to band graph vertices once because afterwards they will be accounted for twice */
    }
  }

  memSet (orgflagloctab, 0, flagSize (orggrafptr->s.vertlocnnd) * sizeof (int)); /* Set vertices as not already considered */

  for (bndfronlocnum = orgfronlocnum = 0; bndfronlocnum < bndgrafdat.fronlocnbr; bndfronlocnum ++) { /* Project back separator except for last layer */
    Gnum                bndvertlocnum;

    bndvertlocnum = bndgrafdat.fronloctab[bndfronlocnum];
    if (bndvertlocnum < bndvertlvlnum) {          /* If vertex does not belong to last layer */
      Gnum                orgvertlocnum;

      orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum];
      flagSet (orgflagloctab, orgvertlocnum);     /* Set vertex as processed */
      orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;
    }
  }

  if (dgraphHaloWait (&requdat) != 0) {
    errorPrint ("bdgraphBipartBd: cannot complete asynchronous halo exchange");
    return     (1);
  }

  orgedlolocval    = 1;                           /* Assume no edge loads */
  commlocloadintn2 = 0;
  for (bndvertlocnum = bndvertlvlnum; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer */
    Gnum                orgedgelocnum;
    Gnum                orgedgelocnnd;
    Gnum                orgvertlocnum;
    GraphPart           orgpartval;
    Gnum                orgflagval;

    orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum];
    orgpartval    = bndgrafdat.partgsttax[bndvertlocnum];

    orgflagval = 0;                               /* Assume vertex does not belong to the frontier */
    for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum];
         orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) {
      Gnum                orgvertlocend;
      Gnum                orgpartend;
      Gnum                orgflagtmp;

      orgvertlocend = orggrafptr->s.edgegsttax[orgedgelocnum];
      orgpartend    = orggrafptr->partgsttax[orgvertlocend];

      orgflagtmp = orgpartval ^ orgpartend;
      if (bndgrafdat.s.edloloctax != NULL)
        orgedlolocval = orggrafptr->s.edloloctax[orgedgelocnum];
      orgflagval       |= orgflagtmp;
      commlocloadintn2 += orgflagtmp * orgedlolocval; /* Internal load to band and original graph vertices are accounted for twice */
      if ((orgflagtmp != 0) && (orgvertlocend < orggrafptr->s.vertlocnnd) && (flagVal (orgflagloctab, orgvertlocend) == 0)) {
        orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocend;
        flagSet (orgflagloctab, orgvertlocend);
      }
    }
    if ((orgflagval != 0) && (flagVal (orgflagloctab, orgvertlocnum) == 0))
      orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;

    flagSet (orgflagloctab, orgvertlocnum);       /* Set vertex as processed anyway */
  }
  commlocloadintn += 2 * commlocloadintn2;        /* Add twice the internal load of original graph edges and once the one of band edges (one removed before) */

  orggrafptr->complocload0    = bndgrafdat.complocload0 - bndvertlocancadj;
  orggrafptr->compglbload0    = bndgrafdat.compglbload0 - bndvertglbancadj;
  orggrafptr->compglbload0dlt = orggrafptr->compglbload0 - orggrafptr->compglbload0avg;

  orgprocsidnbr = orggrafptr->s.procsidnbr;
  if (orgprocsidnbr == 0)
    goto loop_exit;
  orgvertlocnum = orggrafptr->s.baseval;
  orgprocsidnum = 0;
  orgprocsidtab = orggrafptr->s.procsidtab;
  orgprocsidval = orgprocsidtab[orgprocsidnum ++];  
  while (1) {             /* Scan all vertices which have foreign neighbors */
    while (orgprocsidval < 0) {
      orgvertlocnum -= (Gnum) orgprocsidval;
      orgprocsidval  = orgprocsidtab[orgprocsidnum ++];  
    }

    if (flagVal (orgflagloctab, orgvertlocnum) == 0) { /* If vertex not already processed */
      Gnum                orgedgelocnum;
      Gnum                orgedgelocnnd;
      GraphPart           orgpartval;

      orgpartval = orggrafptr->partgsttax[orgvertlocnum];
      for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum];
           orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) {
        if (orggrafptr->partgsttax[orggrafptr->s.edgegsttax[orgedgelocnum]] != orgpartval) {
          orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;
          break;
        }
      }
    }

    do {
      if (orgprocsidnum >= orgprocsidnbr)
        goto loop_exit;
    } while ((orgprocsidval = orgprocsidtab[orgprocsidnum ++]) >= 0);
  }
loop_exit :
  memFree (orgflagloctab);

  reduloctab[0] = commlocloadintn;                /* Twice the internal load; sum globally before dividing by two */
  reduloctab[1] = commlocloadextn;
  reduloctab[2] = commlocgainextn;
  reduloctab[3] = orgfronlocnum;
  if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (3)");
    return     (1);
  }
  orggrafptr->fronlocnbr      = orgfronlocnum;
  orggrafptr->fronglbnbr      = reduglbtab[3];
  orggrafptr->commglbload     = (reduglbtab[0] / 2) * orggrafptr->domndist + reduglbtab[1];
  orggrafptr->commglbgainextn = reduglbtab[2];
  orggrafptr->bbalglbval      = (double) ((orggrafptr->compglbload0dlt < 0) ? (- orggrafptr->compglbload0dlt) : orggrafptr->compglbload0dlt) / (double) orggrafptr->compglbload0avg;

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (orggrafptr) != 0) {
    errorPrint ("bdgraphBipartBd: internal error (2)");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  bdgraphExit (&bndgrafdat);

  return (0);
}
int
bdgraphBipartSq (
Bdgraph * const                     dgrfptr,      /*+ Distributed graph +*/
const BdgraphBipartSqParam * const  paraptr)      /*+ Method parameters +*/
{
  Bgraph            cgrfdat;                      /* Centralized bipartitioned graph structure           */
  Gnum              reduloctab[6];                /* Local array for best bipartition data (7 for Bcast) */
  Gnum              reduglbtab[6];                /* Global array for best bipartition data              */
  MPI_Datatype      besttypedat;                  /* Data type for finding best bipartition              */
  MPI_Op            bestoperdat;                  /* Handle of MPI operator for finding best bipartition */
  int               bestprocnum;                  /* Rank of process holding best partition              */
  Gnum * restrict   vnumloctax;
  Gnum              vertlocnum;
  Gnum              complocsize1;
  Gnum              complocload1;
  Gnum              fronlocnbr;
  int               o;

  if ((MPI_Type_contiguous (6, GNUM_MPI, &besttypedat)                              != MPI_SUCCESS) ||
      (MPI_Type_commit (&besttypedat)                                               != MPI_SUCCESS) ||
      (MPI_Op_create ((MPI_User_function *) bdgraphBipartSqOpBest, 1, &bestoperdat) != MPI_SUCCESS)) {
    errorPrint ("bdgraphBipartSq: communication error (1)");
    return     (1);
  }

  reduloctab[0] =                                 /* In case of error, maximum communication load */
  reduloctab[1] = GNUMMAX;                        /* And maximum load imbalance                   */
  reduloctab[2] = dgrfptr->s.proclocnum;
  reduloctab[3] =                                 /* Assume sequential bipartioning went fine */
  reduloctab[4] = 0;
  reduloctab[5] = 0;                              /* Assume no errors */

  vnumloctax = dgrfptr->s.vnumloctax;             /* No need for vertex number array when centralizing graph */
  dgrfptr->s.vnumloctax = NULL;
  o = bdgraphGatherAll (dgrfptr, &cgrfdat);
  dgrfptr->s.vnumloctax = vnumloctax;             /* Restore vertex number array */
  if (o != 0) {
    errorPrint ("bdgraphBipartSq: cannot build centralized graph");
    return     (1);
  }

  if (bgraphBipartSt (&cgrfdat, paraptr->strat) != 0) { /* Bipartition centralized graph */
    errorPrint ("bdgraphBipartSq: cannot bipartition centralized graph");
    reduloctab[3] =
    reduloctab[4] = 1;
  }
  else {                                          /* Fill local array with local bipartition data */
    reduloctab[0] = ((cgrfdat.fronnbr != 0) || ((cgrfdat.compsize0 != 0) && ((cgrfdat.s.vertnbr - cgrfdat.compsize0) != 0)))
                    ? cgrfdat.commload
                    : GNUMMAX; /* Partitions with empty bipartitions unwanted if they are completely unbalanced */
    reduloctab[1] = cgrfdat.compload0dlt;
  }

  if (dgrfptr->partgsttax == NULL) {
    if (dgraphGhst (&dgrfptr->s) != 0) {          /* Compute ghost edge array if not already present, before copying graph fields */
      errorPrint ("bdgraphBipartSq: cannot compute ghost edge array");
      reduloctab[5] = 1;
    }
    else {
      if ((dgrfptr->partgsttax = (GraphPart *) memAlloc (dgrfptr->s.vertgstnbr * sizeof (GraphPart))) == NULL) {
        errorPrint ("bdgraphBipartSq: out of memory (1)");
        reduloctab[5] = 1;                        /* Allocated data will be freed along with graph structure */
      }
      dgrfptr->partgsttax -= dgrfptr->s.baseval;
    }
    if ((dgrfptr->fronloctab = (Gnum *) memAlloc (dgrfptr->s.vertlocnbr * sizeof (Gnum))) == NULL) {
      errorPrint ("bdgraphBipartSq: out of memory (2)");
      reduloctab[5] = 1;
    }
  }

  if (MPI_Allreduce (reduloctab, reduglbtab, 1, besttypedat, bestoperdat, dgrfptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartSq: communication error (2)");
    return     (1);
  }
  if ((reduloctab[4] != 0) && (reduloctab[4] != dgrfptr->s.procglbnbr)) {
    errorPrint ("bdgraphBipartSq: internal error");
    return     (1);
  }

  if ((MPI_Op_free   (&bestoperdat) != MPI_SUCCESS) ||
      (MPI_Type_free (&besttypedat) != MPI_SUCCESS)) {
    errorPrint ("bdgraphBipartSq: communication error (3)");
    return     (1);
  }

  if (reduglbtab[3] != 0) {                       /* If none of the sequential methods succeeded */
    bgraphExit (&cgrfdat);
    return     (1);
  }

  bestprocnum = (int) reduglbtab[2];
  if (dgrfptr->s.proclocnum == bestprocnum) {     /* If process holds best partition */
    reduloctab[0] = cgrfdat.compload0;            /* Global values to share          */
    reduloctab[1] = cgrfdat.compsize0;
    reduloctab[2] = cgrfdat.commload;
    reduloctab[3] = cgrfdat.commgainextn;
    reduloctab[4] = cgrfdat.fronnbr;
  }
  if (MPI_Bcast (reduloctab, 5, GNUM_MPI, bestprocnum, dgrfptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartSq: communication error (4)");
    return     (1);
  }
  dgrfptr->compglbload0    = reduloctab[0];
  dgrfptr->compglbload0dlt = reduloctab[0] - dgrfptr->compglbload0avg;
  dgrfptr->compglbsize0    = reduloctab[1];
  dgrfptr->commglbload     = reduloctab[2];
  dgrfptr->commglbgainextn = reduloctab[3];
  dgrfptr->fronglbnbr      = reduloctab[4];

  if (commScatterv (cgrfdat.parttax, dgrfptr->s.proccnttab, dgrfptr->s.procdsptab, GRAPHPART_MPI, /* No base for sending as procdsptab holds based values */
                    dgrfptr->partgsttax + dgrfptr->s.baseval, dgrfptr->s.vertlocnbr, GRAPHPART_MPI,
                    bestprocnum, dgrfptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartSq: communication error (5)");
    return     (1);
  }

  if (dgraphHaloSync (&dgrfptr->s, (byte *) (dgrfptr->partgsttax + dgrfptr->s.baseval), GRAPHPART_MPI) != 0) {
    errorPrint ("bdgraphBipartSq: cannot perform halo exchange");
    return     (1);
  }

  complocsize1 = 
  complocload1 = 0;
  for (vertlocnum = dgrfptr->s.baseval, fronlocnbr = 0;
       vertlocnum < dgrfptr->s.vertlocnnd; vertlocnum ++) {
    int               partval;
    Gnum              partval1;
    Gnum              commcut;
    Gnum              edgelocnum;

    partval  = dgrfptr->partgsttax[vertlocnum];
    partval1 = partval & 1;
    complocsize1 += partval1;                     /* Superscalar update */
    if (dgrfptr->s.veloloctax != NULL) {
      Gnum              veloval;

      veloval       = dgrfptr->s.veloloctax[vertlocnum];
      complocload1 += (-partval1) & veloval;      /* Superscalar update */
    }
    for (edgelocnum = dgrfptr->s.vertloctax[vertlocnum], commcut = 0;
       	 edgelocnum < dgrfptr->s.vendloctax[vertlocnum]; edgelocnum ++) { /* Build local frontier */
      int                 partend;
      int                 partdlt;

      partend  = dgrfptr->partgsttax[dgrfptr->s.edgegsttax[edgelocnum]];
      partdlt  = partval ^ partend;
      commcut |= partdlt;
    }
    if (commcut != 0)
      dgrfptr->fronloctab[fronlocnbr ++] = vertlocnum;
  }
  dgrfptr->fronlocnbr   = fronlocnbr;
  dgrfptr->complocsize0 = dgrfptr->s.vertlocnbr - complocsize1;
  dgrfptr->complocload0 = (dgrfptr->s.veloloctax != NULL) ? (dgrfptr->s.velolocsum - complocload1) : dgrfptr->complocsize0;
  
  bgraphExit (&cgrfdat);

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (dgrfptr) != 0) {
    errorPrint ("bdgraphBipartSq: inconsistent graph data");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  return (0);
}
int
SCOTCH_dgraphMapView (
SCOTCH_Dgraph * const         libgrafptr,
const SCOTCH_Dmapping * const libmappptr,
FILE * const                  stream)
{
  Dgraph * restrict             grafptr;
  const LibDmapping * restrict  mappptr;
  ArchDom                       domnfrst;         /* Largest domain in architecture          */
  unsigned int * restrict       nmskloctab;       /* Local neighbor bitfield                 */
  unsigned int * restrict       nmskglbtab;       /* Local neighbor bitfield                 */
  int                           nmskidxnbr;       /* Size of bitfield; int since sent by MPI */
  Gnum * restrict               tgloloctab;       /* Local array of terminal domain loads    */
  Gnum * restrict               tgloglbtab;       /* Global array of terminal domain loads   */
  Gnum * restrict               termgsttax;       /* Terminal domain ghost mapping array     */
  Anum                          tgtnbr;           /* Number of processors in target topology */
  Anum                          tgtnum;
  Anum                          mapnbr;           /* Number of processors effectively used   */
  double                        mapavg;           /* Average mapping weight                  */
  Gnum                          mapmin;
  Gnum                          mapmax;
  Gnum                          mapsum;           /* (Partial) sum of vertex loads           */
  double                        mapdlt;
  double                        mapmmy;           /* Maximum / average ratio                 */
  Anum                          ngbsum;
  Anum                          ngbmin;
  Anum                          ngbmax;
  Gnum                          vertlocnum;
  Gnum                          veloval;
  Gnum                          edloval;
  Gnum                          commlocdist[256 + 3]; /* Array of local load distribution    */
  Gnum                          commglbdist[256 + 3];
  Gnum                          commlocload;      /* Total local edge load (edge sum)        */
  Gnum                          commlocdilat;     /* Total edge dilation                     */
  Gnum                          commlocexpan;     /* Total edge expansion                    */
  Anum                          distmax;
  Anum                          distval;
  int                           cheklocval;
  int                           chekglbval;
  DgraphHaloRequest             requdat;

  grafptr = (Dgraph *) libgrafptr;
  mappptr = (LibDmapping *) libmappptr;

  if ((grafptr->vertglbnbr == 0) ||               /* Return if nothing to do */
      (grafptr->edgeglbnbr == 0))
    return (0);

  archDomFrst (&mappptr->m.archdat, &domnfrst);   /* Get architecture domain      */
  tgtnbr = archDomSize (&mappptr->m.archdat, &domnfrst); /* Get architecture size */

  if (archVar (&mappptr->m.archdat)) {
    errorPrint ("SCOTCH_dgraphMapView: not implemented");
    return     (1);
  }

  if (dgraphGhst (grafptr) != 0) {                /* Compute ghost edge array if not already present */
    errorPrint ("SCOTCH_dgraphMapView: cannot compute ghost edge array");
    return     (1);
  }

  nmskidxnbr = (tgtnbr + 1 + ((sizeof (int) << 3) - 1)) / (sizeof (int) << 3); /* Size of neighbor subdomain bitfield; TRICK: "+1" to have a "-1" cell for unmapped vertices */

  cheklocval = 0;
  if (memAllocGroup ((void **) (void *)
                     &nmskloctab, (size_t) (nmskidxnbr          * sizeof (unsigned int)),
                     &nmskglbtab, (size_t) (nmskidxnbr          * sizeof (unsigned int)),
                     &tgloloctab, (size_t) ((tgtnbr + 1)        * sizeof (Gnum)), /* TRICK: "+1" to have a "-1" cell for unmapped vertices */
                     &tgloglbtab, (size_t) (tgtnbr              * sizeof (Gnum)),
                     &termgsttax, (size_t) (grafptr->vertgstnbr * sizeof (Gnum)), NULL) == NULL) {
    cheklocval = 1;
  }
  if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (1)");
    return     (1);
  }
  if (chekglbval != 0) {
    if (nmskloctab != NULL)
      memFree (nmskloctab);
    errorPrint ("SCOTCH_dgraphMapView: out of memory");
    return     (1);
  }

  if (dmapTerm (&mappptr->m, grafptr, termgsttax) != 0) {
    errorPrint ("SCOTCH_dgraphMapView: cannot build local terminal array");
    memFree    (nmskloctab);
    return     (1);
  }
  dgraphHaloAsync (grafptr, termgsttax, GNUM_MPI, &requdat);
  termgsttax -= grafptr->baseval;

  memSet (tgloloctab, 0, (tgtnbr + 1) * sizeof (Gnum));
  tgloloctab ++;                                  /* TRICK: trim array for "-1" cell */

  veloval = 1;
  for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) {
#ifdef SCOTCH_DEBUG_DMAP2
    if ((termgsttax[vertlocnum] < -1) || (termgsttax[vertlocnum] >= tgtnbr)) {
      errorPrint ("SCOTCH_dgraphMapView: invalid local terminal array");
      memFree    (nmskloctab);                      /* Free group leader */
      return     (1);
    }
#endif /* SCOTCH_DEBUG_DMAP2 */
    if (grafptr->veloloctax != NULL)
      veloval = grafptr->veloloctax[vertlocnum];
    tgloloctab[termgsttax[vertlocnum]] += veloval; /* One more vertex of given weight assigned to this target */
  }

  if (MPI_Allreduce (tgloloctab, tgloglbtab, tgtnbr, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (2)");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  mapmin = GNUMMAX;
  mapmax = 0;
  mapsum = 0;
  mapnbr = 0;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) {
    Gnum                tgtsum;

    tgtsum = tgloglbtab[tgtnum];
    if (tgtsum != 0) {
      mapnbr ++;
      mapsum += tgtsum;
      if (tgtsum < mapmin)
        mapmin = tgtsum;
      if (tgtsum > mapmax)
        mapmax = tgtsum;
    }
  }
  mapavg = (mapnbr == 0) ? 0.0L : ((double) mapsum / (double) mapnbr);

  mapdlt = 0.0L;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++)
    mapdlt += fabs ((double) tgloglbtab[tgtnum] - mapavg);
  mapdlt = (mapnbr != 0) ? mapdlt / ((double) mapnbr * mapavg) : 0.0L;
  mapmmy = (mapnbr != 0) ? (double) mapmax / (double) mapavg : 0.0L;

  if (stream != NULL) {
    fprintf (stream, "M\tProcessors " GNUMSTRING "/" GNUMSTRING "(%g)\n",
             (Gnum) mapnbr,
             (Gnum) tgtnbr,
             (double) mapnbr / (double) tgtnbr);
    fprintf (stream, "M\tTarget min=" GNUMSTRING "\tmax=" GNUMSTRING "\tavg=%g\tdlt=%g\tmaxavg=%g\n",
             (Gnum) mapmin,
             (Gnum) mapmax,
             mapavg,
             mapdlt,
             mapmmy);
  }

  if (dgraphHaloWait (&requdat) != 0) {           /* Wait for ghost terminal data to be exchanged */
    errorPrint ("SCOTCH_dgraphMapView: cannot complete asynchronous halo exchange");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  ngbmin = ANUMMAX;
  ngbmax = 0;
  ngbsum = 0;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) {  /* For all subdomain indices */
    int                 nmskidxnum;
    Gnum                vertlocnum;
    Anum                ngbnbr;

    if (tgloglbtab[tgtnum] <= 0)                  /* If empty subdomain, skip it */
      continue;

    memSet (nmskloctab, 0, nmskidxnbr * sizeof (int)); /* Reset neighbor bit mask */

    for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */
      Gnum                termnum;
      Gnum                edgelocnum;
      Gnum                edgelocnnd;

      termnum = termgsttax[vertlocnum];
      if (termnum != tgtnum)                      /* If vertex does not belong to current part or is not mapped, skip it */
        continue;

      for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum];
           edgelocnum < edgelocnnd; edgelocnum ++) {
        Gnum                termend;

        termend = termgsttax[grafptr->edgegsttax[edgelocnum]];
        if (termend != tgtnum) {                  /* If edge is not internal             */
          termend ++;                             /* TRICK: turn unmapped to 0 and so on */
          nmskloctab[termend / (sizeof (int) << 3)] |= 1 << (termend & ((sizeof (int) << 3) - 1)); /* Flag neighbor in bit array */
        }
      }
    }
    nmskloctab[0] &= ~1;                          /* Do not account for unmapped vertices (terminal domain 0 because of "+1") */

    if (MPI_Allreduce (nmskloctab, nmskglbtab, nmskidxnbr, MPI_INT, MPI_BOR, grafptr->proccomm) != MPI_SUCCESS) {
      errorPrint ("SCOTCH_dgraphMapView: communication error (3)");
      memFree    (nmskloctab);                    /* Free group leader */
      return     (1);
    }

    for (nmskidxnum = 0, ngbnbr = 0; nmskidxnum < nmskidxnbr; nmskidxnum ++) {
      unsigned int        nmskbitval;

      for (nmskbitval = nmskglbtab[nmskidxnum]; nmskbitval != 0; nmskbitval >>= 1)
        ngbnbr += nmskbitval & 1;
    }

    ngbsum += ngbnbr;
    if (ngbnbr < ngbmin)
      ngbmin = ngbnbr;
    if (ngbnbr > ngbmax)
      ngbmax = ngbnbr;
  }

  if (stream != NULL) {
    fprintf (stream, "M\tNeighbors min=" GNUMSTRING "\tmax=" GNUMSTRING "\tsum=" GNUMSTRING "\n",
             (Gnum) ngbmin,
             (Gnum) ngbmax,
             (Gnum) ngbsum);
  }

  memSet (commlocdist, 0, 256 * sizeof (Gnum));   /* Initialize the data */
  commlocload  =
  commlocdilat =
  commlocexpan = 0;

  edloval = 1;
  for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */
    Gnum                termlocnum;
    ArchDom             termdomdat;
    Gnum                edgelocnum;
    Gnum                edgelocnnd;

    termlocnum = termgsttax[vertlocnum];
    if (termlocnum == ~0)                         /* Skip unmapped vertices */
      continue;

    archDomTerm (&mappptr->m.archdat, &termdomdat, termlocnum);

    for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum];
         edgelocnum < edgelocnnd; edgelocnum ++) {
      ArchDom             termdomend;
      Gnum                termgstend;
      Anum                distval;

      termgstend = termgsttax[grafptr->edgegsttax[edgelocnum]];
      if (termgstend == ~0)                       /* Skip unmapped end vertices */
        continue;

      distval = 0;
      if (grafptr->edloloctax != NULL)            /* Get edge weight if any */
        edloval = grafptr->edloloctax[edgelocnum];
      if (termgstend != termlocnum) {             /* If not same domain, compute distance */
        archDomTerm (&mappptr->m.archdat, &termdomend, termgstend);
        distval = archDomDist (&mappptr->m.archdat, &termdomdat, &termdomend);
      }
      commlocdist[(distval > 255) ? 255 : distval] += edloval;
      commlocload  += edloval;
      commlocdilat += distval;
      commlocexpan += distval * edloval;
    }
  }
  commlocdist[256]     = commlocload;
  commlocdist[256 + 1] = commlocdilat;
  commlocdist[256 + 2] = commlocexpan;

  if (MPI_Allreduce (commlocdist, commglbdist, 256 + 3, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (4)");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  if (stream != NULL) {
    Gnum                commglbload;

    commglbload = commglbdist[256];
    fprintf (stream, "M\tCommDilat=%f\t(" GNUMSTRING ")\n", /* Print expansion parameters */
           (double) commglbdist[256 + 1] / grafptr->edgeglbnbr,
           (Gnum) (commglbdist[256 + 1] / 2));
    fprintf (stream, "M\tCommExpan=%f\t(" GNUMSTRING ")\n",
             ((commglbload == 0) ? (double) 0.0L
                                 : (double) commglbdist[256 + 2] / (double) commglbload),
             (Gnum) (commglbdist[256 + 2] / 2));
    fprintf (stream, "M\tCommCutSz=%f\t(" GNUMSTRING ")\n",
             ((commglbload == 0) ? (double) 0.0L
                                 : (double) (commglbload - commglbdist[0]) / (double) commglbload),
             (Gnum) ((commglbload - commglbdist[0]) / 2));
    fprintf (stream, "M\tCommDelta=%f\n",
             (((double) commglbload  * (double) commglbdist[256 + 1]) == 0.0L)
             ? (double) 0.0L
             : ((double) commglbdist[256 + 2] * (double) grafptr->edgeglbnbr) /
               ((double) commglbload * (double) commglbdist[256 + 2]));

    for (distmax = 255; distmax != -1; distmax --)  /* Find longest distance */
      if (commglbdist[distmax] != 0)
        break;
    for (distval = 0; distval <= distmax; distval ++) /* Print distance histogram */
      fprintf (stream, "M\tCommLoad[" ANUMSTRING "]=%f\n",
               (Anum) distval,
               (double) commglbdist[distval] / (double) commglbload);
  }

  memFree (nmskloctab);                           /* Free group leader */

  return (0);
}
int
SCOTCH_dgraphGhst (
SCOTCH_Dgraph * const       grafptr)
{
  return (dgraphGhst ((Dgraph *) grafptr));
}
int
bdgraphBipartDf (
Bdgraph * const                     grafptr,      /*+ Distributed graph +*/
const BdgraphBipartDfParam * const  paraptr)      /*+ Method parameters +*/
{
  float * restrict        ielsloctax;             /* Inverse of degree array   */
  float * restrict        veexloctax;             /* Veexval over domdist      */
  float * restrict        difogsttax;             /* Old diffusion value array */
  float * restrict        difngsttax;             /* New diffusion value array */
  const Gnum * restrict   edgegsttax;
  Gnum                    fronlocnum;
  Gnum                    veexlocnbr;
  float                   vanclocval[2];
  float                   valolocval[2];          /* Fraction of load to remove from anchor vertices at each step */
  Gnum                    vanclocnnd;
  Gnum                    vertlocnum;
  const Gnum * restrict   velolocbax;
  Gnum                    velolocmsk;
  const Gnum * restrict   edlolocbax;
  Gnum                    edlolocmsk;
  Gnum                    complocload1;
  Gnum                    complocsize1;
  Gnum                    commlocloadintn;
  Gnum                    commlocloadextn;
  Gnum                    commlocgainextn;
  Gnum                    reduloctab[6];
  Gnum                    reduglbtab[6];
  Gnum                    passnum;
  float                   cdifval;
  float                   cremval;
  int                     ovflval;                /* Overflow flag value */

  if (dgraphGhst (&grafptr->s) != 0) {            /* Compute ghost edge array if not already present */
    errorPrint ("bdgraphBipartDf: cannot compute ghost edge array");
    return     (1);
  }

  reduloctab[0] = grafptr->s.vendloctax[grafptr->s.vertlocnnd - 2] - grafptr->s.vertloctax[grafptr->s.vertlocnnd - 2] - (grafptr->s.procglbnbr - 1); /* Local degree of both anchor vertices, minus edges to other anchors */
  reduloctab[1] = grafptr->s.vendloctax[grafptr->s.vertlocnnd - 1] - grafptr->s.vertloctax[grafptr->s.vertlocnnd - 1] - (grafptr->s.procglbnbr - 1); /* Anchor edges have load 1 even for weighted graphs                  */
  if (grafptr->s.veloloctax == NULL)
    reduloctab[2] =                               /* Weights of anchors */
    reduloctab[3] = 1;
  else {
    reduloctab[2] = grafptr->s.veloloctax[grafptr->s.vertlocnnd - 2];
    reduloctab[3] = grafptr->s.veloloctax[grafptr->s.vertlocnnd - 1];
  }

  veexlocnbr = (grafptr->veexloctax != NULL) ? grafptr->s.vertlocnbr : 0;
  if (memAllocGroup ((void **) (void *)
                     &ielsloctax, (size_t) (grafptr->s.vertlocnbr * sizeof (float)),
                     &veexloctax, (size_t) (veexlocnbr            * sizeof (float)),
                     &difogsttax, (size_t) (grafptr->s.vertgstnbr * sizeof (float)),
                     &difngsttax, (size_t) (grafptr->s.vertgstnbr * sizeof (float)), NULL) == NULL) {
    errorPrint ("bdgraphBipartDf: out of memory");
    reduloctab[0] = -1;
  }
  ielsloctax -= grafptr->s.baseval;
  difogsttax -= grafptr->s.baseval;
  difngsttax -= grafptr->s.baseval;
  veexloctax  = (grafptr->veexloctax != NULL) ? (veexloctax - grafptr->s.baseval) : NULL;

  if (MPI_Allreduce (reduloctab, reduglbtab, 4, GNUM_MPI, MPI_SUM, grafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartDf: communication error (1)");
    return     (1);
  }

  if (reduglbtab[0] < 0) {
    if (ielsloctax != NULL)
      memFree (ielsloctax + grafptr->s.baseval);  /* Free group leader */
    return (1);
  }
  if ((reduglbtab[0] == 0) ||                     /* If graph is too small to have any usable anchors, leave partition as is */
      (reduglbtab[1] == 0)) {
    memFree (ielsloctax + grafptr->s.baseval);

    if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (grafptr->partgsttax + grafptr->s.baseval), GRAPHPART_MPI) != 0) {
      errorPrint ("bdgraphBipartDf: cannot propagate part data (1)");
      return     (1);
    }

    return  (0);
  }

  vanclocval[0] = (float) ((paraptr->typeval == BDGRAPHBIPARTDFTYPEBAL) /* If balanced parts wanted */
                           ? grafptr->compglbload0avg /* Target is average                          */
                           : ( (grafptr->compglbload0 < grafptr->compglbload0min) ? grafptr->compglbload0min : /* Else keep load if not off balance */
                              ((grafptr->compglbload0 > grafptr->compglbload0max) ? grafptr->compglbload0max : grafptr->compglbload0)));
  vanclocval[1] = (float) grafptr->s.veloglbsum - vanclocval[0];
  vanclocval[0] = - vanclocval[0];                /* Part 0 holds negative values                         */
  valolocval[0] = (float) reduglbtab[2];          /* Compute values to remove from anchor vertices        */
  valolocval[1] = (float) reduglbtab[3] - BDGRAPHBIPARTDFEPSILON; /* Slightly tilt value to add to part 1 */

  vanclocnnd = grafptr->s.vertlocnnd - 2;         /* Do not account for anchor vertices in diffusion computations */
  if (grafptr->s.edloloctax != NULL) {
    for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) {
      Gnum                edgelocnum;
      Gnum                edgelocnnd;
      Gnum                edlolocsum;

#ifdef SCOTCH_DEBUG_BDGRAPH2
      if ((grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]) == 0) {
        errorPrint ("bdgraphBipartDf: internal error (1)");
        return     (1);
      }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */
      difogsttax[vertlocnum] = 0.0F;
      for (edgelocnum = grafptr->s.vertloctax[vertlocnum], edgelocnnd = grafptr->s.vendloctax[vertlocnum], edlolocsum = 0;
           edgelocnum < edgelocnnd; edgelocnum ++)
        edlolocsum += grafptr->s.edloloctax[edgelocnum];

      ielsloctax[vertlocnum] = 1.0F / (float) edlolocsum;
    }
  }
  else {                                          /* Graph has no edge loads */
    for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) {
#ifdef SCOTCH_DEBUG_BDGRAPH2
      if ((grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]) == 0) {
        errorPrint ("bdgraphBipartDf: internal error (2)");
        return     (1);
      }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */
      ielsloctax[vertlocnum] = 1.0F / (float) (grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]);
      difogsttax[vertlocnum] = 0.0F;
    }
  }
  ielsloctax[vanclocnnd]     = 1.0F / (float) reduglbtab[0];
  ielsloctax[vanclocnnd + 1] = 1.0F / (float) reduglbtab[1];
  difogsttax[vanclocnnd]     = vanclocval[0] * ielsloctax[vanclocnnd]; /* Load anchor vertices for first pass */
  difogsttax[vanclocnnd + 1] = vanclocval[1] * ielsloctax[vanclocnnd + 1];
  difngsttax[vanclocnnd]     =                    /* In case of isolated anchors, do not risk overflow because of NaN */
  difngsttax[vanclocnnd + 1] = 0.0F;

  if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (difogsttax + grafptr->s.baseval), MPI_FLOAT) != 0) { /* Perform initial diffusion (and build communication structures) */
    errorPrint ("bdgraphBipartDf: cannot propagate diffusion data (1)");
    memFree    (ielsloctax + grafptr->s.baseval); /* Free group leader */
    return     (1);
  }

  ovflval    = 0;
  cdifval    = paraptr->cdifval;
  cremval    = paraptr->cremval;
  edgegsttax = grafptr->s.edgegsttax;
  for (passnum = 0; ; ) {                         /* For all passes         */
    if (ovflval == 0) {                           /* If no overflow occured */
      float *             diftgsttax;             /* Temporary swap value   */
      Gnum                vertlocnum;
      float               veloval;

      veloval = 1.0F;                             /* Assume no vertex loads */
      for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) {
        Gnum                edgelocnum;
        Gnum                edgelocnnd;
        float               diffval;

        diffval    = 0.0F;
        edgelocnum = grafptr->s.vertloctax[vertlocnum];
        edgelocnnd = grafptr->s.vendloctax[vertlocnum];
        if (grafptr->s.edloloctax != NULL)
          for ( ; edgelocnum < edgelocnnd; edgelocnum ++)
            diffval += difogsttax[edgegsttax[edgelocnum]] * (float) grafptr->s.edloloctax[edgelocnum];
        else
          for ( ; edgelocnum < edgelocnnd; edgelocnum ++)
            diffval += difogsttax[edgegsttax[edgelocnum]];

        diffval *= cdifval;
        diffval += (difogsttax[vertlocnum] * cremval) / ielsloctax[vertlocnum];

        if (grafptr->s.veloloctax != NULL)
          veloval = (float) grafptr->s.veloloctax[vertlocnum];
        if (diffval >= 0.0F) {
          diffval = (diffval - veloval) * ielsloctax[vertlocnum];
          if (diffval <= 0.0F)
            diffval = +BDGRAPHBIPARTDFEPSILON;
        }
        else {
          diffval = (diffval + veloval) * ielsloctax[vertlocnum];
          if (diffval >= 0.0F)
            diffval = -BDGRAPHBIPARTDFEPSILON;
        }
        if (isnan (diffval)) {                    /* If overflow occured                    */
          ovflval = 1;                            /* We are in state of overflow            */
          goto abort;                             /* Exit this loop without swapping arrays */
        }
        difngsttax[vertlocnum] = diffval;
      }
      for ( ; vertlocnum < grafptr->s.vertlocnnd; vertlocnum ++) { /* For the two local anchor vertices */
        Gnum                edgelocnum;
        Gnum                edgelocnnd;
        float               diffval;

        diffval    = 0.0F;
        edgelocnum = grafptr->s.vertloctax[vertlocnum] + grafptr->s.procglbnbr - 1; /* Skip links to other anchors */
        edgelocnnd = grafptr->s.vendloctax[vertlocnum];
        if (edgelocnum == edgelocnnd)             /* If isolated anchor */
          continue;                               /* Barrel is empty    */

        for ( ; edgelocnum < edgelocnnd; edgelocnum ++) /* Anchor edges have load 1 even for weighted graphs */
          diffval += difogsttax[edgegsttax[edgelocnum]];

        diffval *= cdifval;
        diffval += vanclocval[vertlocnum - vanclocnnd] + (difogsttax[vertlocnum] * cremval) / ielsloctax[vertlocnum];
        if (diffval >= 0.0F) {
          diffval = (diffval - valolocval[vertlocnum - vanclocnnd]) * ielsloctax[vertlocnum];
          if (diffval <= 0.0F)
            diffval = +BDGRAPHBIPARTDFEPSILON;
        }
        else {
          diffval = (diffval + valolocval[vertlocnum - vanclocnnd]) * ielsloctax[vertlocnum];
          if (diffval >= 0.0F)
            diffval = -BDGRAPHBIPARTDFEPSILON;
        }
        if (isnan (diffval)) {                    /* If overflow occured                    */
          ovflval = 1;                            /* We are in state of overflow            */
          goto abort;                             /* Exit this loop without swapping arrays */
        }
        difngsttax[vertlocnum] = diffval;
      }

      diftgsttax = (float *) difngsttax;          /* Swap old and new diffusion arrays          */
      difngsttax = (float *) difogsttax;          /* Casts to prevent IBM compiler from yelling */
      difogsttax = (float *) diftgsttax;
    }
abort :                                           /* If overflow occured, resume here    */
    if (++ passnum >= paraptr->passnbr)           /* If maximum number of passes reached */
      break;                                      /* Exit main loop                      */

    if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (difogsttax + grafptr->s.baseval), MPI_FLOAT) != 0) {
      errorPrint ("bdgraphBipartDf: cannot propagate diffusion data (2)");
      memFree    (ielsloctax + grafptr->s.baseval); /* Free group leader */
      return     (1);
    }
  }

  for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) /* Set new part distribution */
    grafptr->partgsttax[vertlocnum] = (difogsttax[vertlocnum] <= 0.0F) ? 0 : 1;
  grafptr->partgsttax[vanclocnnd]     = 0;        /* Set up parts in case anchors are isolated */
  grafptr->partgsttax[vanclocnnd + 1] = 1;

  if (grafptr->s.veloloctax != NULL) {
    velolocbax = grafptr->s.veloloctax;
    velolocmsk = ~((Gnum) 0);
  }
  else {
    velolocbax = &bdgraphbipartdfloadone;
    velolocmsk = 0;
  }
  if (grafptr->s.edloloctax != NULL) {
    edlolocbax = grafptr->s.edloloctax;
    edlolocmsk = ~((Gnum) 0);
  }
  else {
    edlolocbax = &bdgraphbipartdfloadone;
    edlolocmsk = 0;
  }

  memFree (ielsloctax + grafptr->s.baseval);      /* Free group leader */

  if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (grafptr->partgsttax + grafptr->s.baseval), GRAPHPART_MPI) != 0) {
    errorPrint ("bdgraphBipartDf: cannot propagate part data (2)");
    return     (1);
  }

  commlocloadintn =
  commlocloadextn =
  commlocgainextn = 0;
  for (vertlocnum = grafptr->s.baseval, fronlocnum = complocsize1 = complocload1 = 0;
       vertlocnum < grafptr->s.vertlocnnd; vertlocnum ++) {
    Gnum                edgelocnum;
    Gnum                edgelocnnd;
    Gnum                veloval;
    Gnum                partval;
    Gnum                flagval;

#ifdef SCOTCH_DEBUG_BDGRAPH2
    if (grafptr->partgsttax[vertlocnum] > 1) {
      errorPrint ("bdgraphBipartDf: internal error (3)");
      break;                                      /* Do not break upcoming collective communications */
    }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */
    partval = (Gnum) grafptr->partgsttax[vertlocnum];
    veloval = velolocbax[vertlocnum & velolocmsk];
    if (grafptr->veexloctax != NULL) {
      commlocloadextn += grafptr->veexloctax[vertlocnum] * partval;
      commlocgainextn += grafptr->veexloctax[vertlocnum] * (1 - partval * 2);
    }
    complocsize1 += partval;
    complocload1 += partval * veloval;

    flagval = 0;
    for (edgelocnum = grafptr->s.vertloctax[vertlocnum], edgelocnnd = grafptr->s.vendloctax[vertlocnum];
         edgelocnum < edgelocnnd; edgelocnum ++) {
      Gnum                edloval;
      Gnum                partend;

      partend = (Gnum) grafptr->partgsttax[edgegsttax[edgelocnum]];
#ifdef SCOTCH_DEBUG_BDGRAPH2
      if (partend > 1) {
        errorPrint ("bdgraphBipartDf: internal error (4)");
        vertlocnum = grafptr->s.vertlocnnd;
        break;                                    /* Do not break upcoming collective communications */
      }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */
      edloval  = edlolocbax[edgelocnum & edlolocmsk];
      flagval |= partval ^ partend;
      commlocloadintn += (partval ^ partend) * edloval; /* Internal load is accounted for twice */
    }
    if (flagval != 0)                             /* If vertex has neighbors in other part */
      grafptr->fronloctab[fronlocnum ++] = vertlocnum; /* Record it as member of separator */
  }
  grafptr->complocload0 = grafptr->s.velolocsum - complocload1;
  grafptr->complocsize0 = grafptr->s.vertlocnbr - complocsize1;
  grafptr->fronlocnbr   = fronlocnum;

  reduloctab[0] = fronlocnum;
  reduloctab[1] = grafptr->complocload0;
  reduloctab[2] = grafptr->complocsize0;
  reduloctab[3] = commlocloadintn;                /* Twice the internal load; sum globally before dividing by two */
  reduloctab[4] = commlocloadextn;
  reduloctab[5] = commlocgainextn;
  if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 6, GNUM_MPI, MPI_SUM, grafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartDf: communication error (2)");
    return     (1);
  }
  grafptr->fronglbnbr      = reduglbtab[0];
  grafptr->compglbload0    = reduglbtab[1];
  grafptr->compglbload0dlt = grafptr->compglbload0 - grafptr->compglbload0avg;
  grafptr->compglbsize0    = reduglbtab[2];
  grafptr->commglbload     = (reduglbtab[3] / 2) * grafptr->domdist + reduglbtab[4];
  grafptr->commglbgainextn = reduglbtab[5];

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (grafptr) != 0) {
    errorPrint ("bdgraphBipartDf: internal error (5)");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  return (0);
}