Пример #1
0
int
SCOTCH_dgraphHaloAsync (
SCOTCH_Dgraph * const         grafptr,
void * const                  datatab,
const MPI_Datatype            typeval,
SCOTCH_DgraphHaloReq * const  requptr)
{
  dgraphHaloAsync ((Dgraph *) grafptr, (byte *) datatab, typeval, (DgraphHaloRequest *) requptr);
  return (0);
}
Пример #2
0
int
bdgraphBipartBd (
Bdgraph * const                     orggrafptr,   /*+ Distributed graph +*/
const BdgraphBipartBdParam * const  paraptr)      /*+ Method parameters +*/
{
  Bdgraph                 bndgrafdat;             /* Bipartitioning band graph structure                          */
  Gnum                    bndvertancnnd;          /* End of local vertex array, without anchors                   */
  Gnum                    bndvertlocnbr1;         /* Number of band graph vertices in part 1 except anchor 1      */
  Gnum                    bndvertlocnum;
  Gnum                    bndvertlvlnum;          /* Based number of first band vertex in last layer              */
  Gnum                    bndvertlocancadj;       /* Flag set when anchor(s) represent unexistent vertices        */
  Gnum                    bndvertglbancadj;       /* Global adjustment of anchor vertices                         */
  Gnum                    bndveexlocsum;          /* Local sum of veexloctax array cells for band graph           */
  Gnum                    bndveexlocsum0;         /* Local sum of veexloctax array cells in part 0 for band graph */
  Gnum                    bndedlolocval;
  Gnum                    bndfronlocnum;
  Gnum                    orgfronlocnum;
  int * restrict          orgflagloctab;
  Gnum                    orgvertlocnum;
  Gnum                    orgedlolocval;
  const int * restrict    orgprocsidtab;
  int                     orgprocsidnbr;
  int                     orgprocsidnum;
  int                     orgprocsidval;
  Gnum                    complocsizeadj0;
  Gnum                    commlocloadintn;
  Gnum                    commlocloadintn2;       /* Twice twice (4 times) the internal communication load of last layer */
  Gnum                    commlocloadextn;
  Gnum                    commlocgainextn;
  Gnum                    reduloctab[7];
  Gnum                    reduglbtab[7];
  DgraphHaloRequest       requdat;

  if (orggrafptr->fronglbnbr == 0)                /* If no separator vertices, apply strategy to full (original) graph */
    return (bdgraphBipartSt (orggrafptr, paraptr->stratorg));

  if (dgraphBand (&orggrafptr->s, orggrafptr->fronlocnbr, orggrafptr->fronloctab, orggrafptr->partgsttax,
                  orggrafptr->complocload0, orggrafptr->s.velolocsum - orggrafptr->complocload0, paraptr->distmax,
                  &bndgrafdat.s, &bndgrafdat.fronloctab, &bndgrafdat.partgsttax,
                  &bndvertlvlnum, &bndvertlocnbr1, &bndvertlocancadj) != 0) {
    errorPrint ("bdgraphBipartBd: cannot create band graph");
    return     (1);
  }
  bndvertancnnd = bndgrafdat.s.vertlocnnd - 2;

  reduloctab[0]  = 0;                             /* Assume no memory allocation problem */
  bndveexlocsum  =
  bndveexlocsum0 = 0;
  bndgrafdat.veexloctax = NULL;                   /* Assume no external gains */
  if (orggrafptr->veexloctax != NULL) {
    if ((bndgrafdat.veexloctax = memAlloc (bndgrafdat.s.vertlocnbr * sizeof (Gnum))) == NULL) {
      errorPrint ("bdgraphBipartBd: out of memory (1)");
      reduloctab[0] = 1;                         /* Memory error */
    }
    else {
      Gnum                bndvertlocnum;

      bndgrafdat.veexloctax -= bndgrafdat.s.baseval;

      for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) {
        Gnum                veexval;

        veexval = orggrafptr->veexloctax[bndgrafdat.s.vnumloctax[bndvertlocnum]];
        bndgrafdat.veexloctax[bndvertlocnum] = veexval;
        bndveexlocsum  += veexval;
        bndveexlocsum0 += veexval & (((Gnum) bndgrafdat.partgsttax[bndvertlocnum]) - 1);
      }
    }
  }
  reduloctab[1] = bndgrafdat.s.vendloctax[bndvertancnnd]     - bndgrafdat.s.vertloctax[bndvertancnnd]     - (orggrafptr->s.procglbnbr - 1); /* Anchor degrees */
  reduloctab[2] = bndgrafdat.s.vendloctax[bndvertancnnd + 1] - bndgrafdat.s.vertloctax[bndvertancnnd + 1] - (orggrafptr->s.procglbnbr - 1);

  bndgrafdat.complocsize0 = bndgrafdat.s.vertlocnbr - (bndvertlocnbr1 + 1); /* Add 1 for anchor vertex 1 */
  complocsizeadj0 = orggrafptr->complocsize0 - bndgrafdat.complocsize0; /* -1 less because of anchor 0   */
  reduloctab[3] = bndgrafdat.complocsize0;
  reduloctab[4] = bndvertlocancadj;               /* Sum increases in size and load */
  reduloctab[5] = bndveexlocsum;
  reduloctab[6] = bndveexlocsum0;
  if (MPI_Allreduce (reduloctab, reduglbtab, 7, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (1)");
    return     (1);
  }
  if (reduglbtab[0] != 0) {
    bdgraphExit (&bndgrafdat);
    return      (1);
  }
  if ((reduglbtab[1] == 0) ||                     /* If graph is too small to have any usable anchors */
      (reduglbtab[2] == 0)) {
    bdgraphExit (&bndgrafdat);
    return      (bdgraphBipartSt (orggrafptr, paraptr->stratorg));
  }

  bndvertglbancadj            = reduglbtab[4];
  bndgrafdat.veexglbsum       = orggrafptr->veexglbsum; /* All external gains preserved                  */
  bndgrafdat.fronlocnbr       = orggrafptr->fronlocnbr; /* All separator vertices are kept in band graph */
  bndgrafdat.fronglbnbr       = orggrafptr->fronglbnbr;
  bndgrafdat.complocload0     = orggrafptr->complocload0    + bndvertlocancadj; /* All loads are kept in band graph */
  bndgrafdat.compglbload0     = orggrafptr->compglbload0    + bndvertglbancadj;
  bndgrafdat.compglbload0min  = orggrafptr->compglbload0min + bndvertglbancadj; /* Tilt extrema loads according to adjustments */
  bndgrafdat.compglbload0max  = orggrafptr->compglbload0max + bndvertglbancadj;
  bndgrafdat.compglbload0avg  = orggrafptr->compglbload0avg + bndvertglbancadj; /* Tilt average load according to adjustments */
  bndgrafdat.compglbload0dlt  = orggrafptr->compglbload0dlt;
  bndgrafdat.compglbsize0     = reduglbtab[3];
  bndgrafdat.commglbload      = orggrafptr->commglbload;
  bndgrafdat.commglbgainextn  = orggrafptr->commglbgainextn;
  bndgrafdat.commglbloadextn0 = orggrafptr->commglbloadextn0;
  bndgrafdat.commglbgainextn0 = orggrafptr->commglbgainextn0;
  bndgrafdat.bbalglbval       = orggrafptr->bbalglbval;
  bndgrafdat.domndist         = orggrafptr->domndist;
  bndgrafdat.domnwght[0]      = orggrafptr->domnwght[0];
  bndgrafdat.domnwght[1]      = orggrafptr->domnwght[1];
  bndgrafdat.levlnum          = orggrafptr->levlnum;

  if (bndgrafdat.veexloctax != NULL) {
    Gnum                bndveexglbanc0;
    Gnum                bndveexglbanc1;

    bndveexglbanc0 = (orggrafptr->veexglbsum + orggrafptr->commglbgainextn) / 2 - reduglbtab[6]; /* Compute global external gains of anchors */
    bndveexglbanc1 = (orggrafptr->veexglbsum - bndveexglbanc0) - reduglbtab[5];

    bndgrafdat.veexloctax[bndvertancnnd]     = DATASIZE (bndveexglbanc0, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); /* Spread gains across local anchors */
    bndgrafdat.veexloctax[bndvertancnnd + 1] = DATASIZE (bndveexglbanc1, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum);
  }

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (&bndgrafdat) != 0) {
    errorPrint ("bdgraphBipartBd: internal error (1)");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  if (bdgraphBipartSt (&bndgrafdat, paraptr->stratbnd) != 0) { /* Separate distributed band graph */
    errorPrint  ("bdgraphBipartBd: cannot separate band graph");
    bdgraphExit (&bndgrafdat);
    return      (1);
  }

  reduloctab[0] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd]; /* Check if anchor vertices remain in their parts */
  reduloctab[1] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd + 1];
  reduloctab[2] = complocsizeadj0;
  reduloctab[3] = 0;                              /* Assume memory allocation is all right */
  if ((orgflagloctab = memAlloc (flagSize (orggrafptr->s.vertlocnnd) * sizeof (int))) == NULL) { /* Eventually keep space for based indices */
    errorPrint ("bdgraphBipartBd: out of memory (2)");
    reduloctab[3] = 1;
  }

  if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (2)");
    return     (1);
  }

  if (((reduglbtab[0] + reduglbtab[1]) != orggrafptr->s.procglbnbr)         || /* If not all anchors of initial same parts in same parts */
      ((reduglbtab[0] != 0) && (reduglbtab[0] != orggrafptr->s.procglbnbr)) ||
      (reduglbtab[3] != 0)) {
    if (orgflagloctab != NULL)
      memFree (orgflagloctab);
    bdgraphExit (&bndgrafdat);                    /* Apply original strategy to full graph */
    return      (bdgraphBipartSt (orggrafptr, paraptr->stratorg));
  }

  if (dgraphGhst (&bndgrafdat.s) != 0) {          /* Compute ghost edge array if not already present */
    errorPrint ("bdgraphBipartBd: cannot compute ghost edge array");
    return     (1);
  }

  if (reduglbtab[0] == orggrafptr->s.procglbnbr) { /* If all anchors swapped parts, swap all parts of original vertices */
    Gnum                orgvertnum;

    orggrafptr->complocsize0 = orggrafptr->s.vertlocnbr - reduloctab[2] - bndgrafdat.s.vertlocnbr + bndgrafdat.complocsize0;
    orggrafptr->compglbsize0 = orggrafptr->s.vertglbnbr - reduglbtab[2] - bndgrafdat.s.vertglbnbr + bndgrafdat.compglbsize0;

    for (orgvertnum = orggrafptr->s.baseval; orgvertnum < orggrafptr->s.vertlocnnd; orgvertnum ++)
      orggrafptr->partgsttax[orgvertnum] ^= 1;
  }
  else {
    orggrafptr->complocsize0 = reduloctab[2] + bndgrafdat.complocsize0;
    orggrafptr->compglbsize0 = reduglbtab[2] + bndgrafdat.compglbsize0;
  }

  for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) /* Update part array of all vertices except anchors */
    orggrafptr->partgsttax[bndgrafdat.s.vnumloctax[bndvertlocnum]] = bndgrafdat.partgsttax[bndvertlocnum];

  dgraphHaloAsync (&orggrafptr->s, (byte *) (orggrafptr->partgsttax + orggrafptr->s.baseval), GRAPHPART_MPI, &requdat); /* Share part array of full graph */

  commlocloadintn =
  commlocloadextn =
  commlocgainextn = 0;
  bndedlolocval   = 1;                            /* Assume no edge loads */
  for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertlvlnum; bndvertlocnum ++) { /* For all vertices of band graph save for last layer */
    Gnum                bndedgelocnum;
    Gnum                bndedgelocnnd;
    Gnum                bndpartval;

    bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum];
    if (bndgrafdat.veexloctax != NULL) {
      commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval;
      commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2);
    }
    for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum];
         bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) {
      Gnum                bndvertlocend;
      Gnum                bndpartend;

      bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum];
      bndpartend    = bndgrafdat.partgsttax[bndvertlocend];

      if (bndgrafdat.s.edloloctax != NULL)
        bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum];
      commlocloadintn += (bndpartval ^ bndpartend) * bndedlolocval; /* Internal load is accounted for twice */
    }
  }
  for ( ; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer, remove internal loads to band vertices once */
    Gnum                bndedgelocnum;
    Gnum                bndedgelocnnd;
    Gnum                bndpartval;

    bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum];
    if (bndgrafdat.veexloctax != NULL) {
      commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval;
      commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2);
    }
    for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum] - 1; /* "-1" to avoid anchor edges */
         bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) {
      Gnum                bndvertlocend;
      Gnum                bndpartend;

      bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum];
      bndpartend    = bndgrafdat.partgsttax[bndvertlocend];

      if (bndgrafdat.s.edloloctax != NULL)
        bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum];
      commlocloadintn -= (bndpartval ^ bndpartend) * bndedlolocval; /* Remove internal loads to band graph vertices once because afterwards they will be accounted for twice */
    }
  }

  memSet (orgflagloctab, 0, flagSize (orggrafptr->s.vertlocnnd) * sizeof (int)); /* Set vertices as not already considered */

  for (bndfronlocnum = orgfronlocnum = 0; bndfronlocnum < bndgrafdat.fronlocnbr; bndfronlocnum ++) { /* Project back separator except for last layer */
    Gnum                bndvertlocnum;

    bndvertlocnum = bndgrafdat.fronloctab[bndfronlocnum];
    if (bndvertlocnum < bndvertlvlnum) {          /* If vertex does not belong to last layer */
      Gnum                orgvertlocnum;

      orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum];
      flagSet (orgflagloctab, orgvertlocnum);     /* Set vertex as processed */
      orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;
    }
  }

  if (dgraphHaloWait (&requdat) != 0) {
    errorPrint ("bdgraphBipartBd: cannot complete asynchronous halo exchange");
    return     (1);
  }

  orgedlolocval    = 1;                           /* Assume no edge loads */
  commlocloadintn2 = 0;
  for (bndvertlocnum = bndvertlvlnum; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer */
    Gnum                orgedgelocnum;
    Gnum                orgedgelocnnd;
    Gnum                orgvertlocnum;
    GraphPart           orgpartval;
    Gnum                orgflagval;

    orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum];
    orgpartval    = bndgrafdat.partgsttax[bndvertlocnum];

    orgflagval = 0;                               /* Assume vertex does not belong to the frontier */
    for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum];
         orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) {
      Gnum                orgvertlocend;
      Gnum                orgpartend;
      Gnum                orgflagtmp;

      orgvertlocend = orggrafptr->s.edgegsttax[orgedgelocnum];
      orgpartend    = orggrafptr->partgsttax[orgvertlocend];

      orgflagtmp = orgpartval ^ orgpartend;
      if (bndgrafdat.s.edloloctax != NULL)
        orgedlolocval = orggrafptr->s.edloloctax[orgedgelocnum];
      orgflagval       |= orgflagtmp;
      commlocloadintn2 += orgflagtmp * orgedlolocval; /* Internal load to band and original graph vertices are accounted for twice */
      if ((orgflagtmp != 0) && (orgvertlocend < orggrafptr->s.vertlocnnd) && (flagVal (orgflagloctab, orgvertlocend) == 0)) {
        orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocend;
        flagSet (orgflagloctab, orgvertlocend);
      }
    }
    if ((orgflagval != 0) && (flagVal (orgflagloctab, orgvertlocnum) == 0))
      orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;

    flagSet (orgflagloctab, orgvertlocnum);       /* Set vertex as processed anyway */
  }
  commlocloadintn += 2 * commlocloadintn2;        /* Add twice the internal load of original graph edges and once the one of band edges (one removed before) */

  orggrafptr->complocload0    = bndgrafdat.complocload0 - bndvertlocancadj;
  orggrafptr->compglbload0    = bndgrafdat.compglbload0 - bndvertglbancadj;
  orggrafptr->compglbload0dlt = orggrafptr->compglbload0 - orggrafptr->compglbload0avg;

  orgprocsidnbr = orggrafptr->s.procsidnbr;
  if (orgprocsidnbr == 0)
    goto loop_exit;
  orgvertlocnum = orggrafptr->s.baseval;
  orgprocsidnum = 0;
  orgprocsidtab = orggrafptr->s.procsidtab;
  orgprocsidval = orgprocsidtab[orgprocsidnum ++];  
  while (1) {             /* Scan all vertices which have foreign neighbors */
    while (orgprocsidval < 0) {
      orgvertlocnum -= (Gnum) orgprocsidval;
      orgprocsidval  = orgprocsidtab[orgprocsidnum ++];  
    }

    if (flagVal (orgflagloctab, orgvertlocnum) == 0) { /* If vertex not already processed */
      Gnum                orgedgelocnum;
      Gnum                orgedgelocnnd;
      GraphPart           orgpartval;

      orgpartval = orggrafptr->partgsttax[orgvertlocnum];
      for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum];
           orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) {
        if (orggrafptr->partgsttax[orggrafptr->s.edgegsttax[orgedgelocnum]] != orgpartval) {
          orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum;
          break;
        }
      }
    }

    do {
      if (orgprocsidnum >= orgprocsidnbr)
        goto loop_exit;
    } while ((orgprocsidval = orgprocsidtab[orgprocsidnum ++]) >= 0);
  }
loop_exit :
  memFree (orgflagloctab);

  reduloctab[0] = commlocloadintn;                /* Twice the internal load; sum globally before dividing by two */
  reduloctab[1] = commlocloadextn;
  reduloctab[2] = commlocgainextn;
  reduloctab[3] = orgfronlocnum;
  if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) {
    errorPrint ("bdgraphBipartBd: communication error (3)");
    return     (1);
  }
  orggrafptr->fronlocnbr      = orgfronlocnum;
  orggrafptr->fronglbnbr      = reduglbtab[3];
  orggrafptr->commglbload     = (reduglbtab[0] / 2) * orggrafptr->domndist + reduglbtab[1];
  orggrafptr->commglbgainextn = reduglbtab[2];
  orggrafptr->bbalglbval      = (double) ((orggrafptr->compglbload0dlt < 0) ? (- orggrafptr->compglbload0dlt) : orggrafptr->compglbload0dlt) / (double) orggrafptr->compglbload0avg;

#ifdef SCOTCH_DEBUG_BDGRAPH2
  if (bdgraphCheck (orggrafptr) != 0) {
    errorPrint ("bdgraphBipartBd: internal error (2)");
    return     (1);
  }
#endif /* SCOTCH_DEBUG_BDGRAPH2 */

  bdgraphExit (&bndgrafdat);

  return (0);
}
int
SCOTCH_dgraphMapView (
SCOTCH_Dgraph * const         libgrafptr,
const SCOTCH_Dmapping * const libmappptr,
FILE * const                  stream)
{
  Dgraph * restrict             grafptr;
  const LibDmapping * restrict  mappptr;
  ArchDom                       domnfrst;         /* Largest domain in architecture          */
  unsigned int * restrict       nmskloctab;       /* Local neighbor bitfield                 */
  unsigned int * restrict       nmskglbtab;       /* Local neighbor bitfield                 */
  int                           nmskidxnbr;       /* Size of bitfield; int since sent by MPI */
  Gnum * restrict               tgloloctab;       /* Local array of terminal domain loads    */
  Gnum * restrict               tgloglbtab;       /* Global array of terminal domain loads   */
  Gnum * restrict               termgsttax;       /* Terminal domain ghost mapping array     */
  Anum                          tgtnbr;           /* Number of processors in target topology */
  Anum                          tgtnum;
  Anum                          mapnbr;           /* Number of processors effectively used   */
  double                        mapavg;           /* Average mapping weight                  */
  Gnum                          mapmin;
  Gnum                          mapmax;
  Gnum                          mapsum;           /* (Partial) sum of vertex loads           */
  double                        mapdlt;
  double                        mapmmy;           /* Maximum / average ratio                 */
  Anum                          ngbsum;
  Anum                          ngbmin;
  Anum                          ngbmax;
  Gnum                          vertlocnum;
  Gnum                          veloval;
  Gnum                          edloval;
  Gnum                          commlocdist[256 + 3]; /* Array of local load distribution    */
  Gnum                          commglbdist[256 + 3];
  Gnum                          commlocload;      /* Total local edge load (edge sum)        */
  Gnum                          commlocdilat;     /* Total edge dilation                     */
  Gnum                          commlocexpan;     /* Total edge expansion                    */
  Anum                          distmax;
  Anum                          distval;
  int                           cheklocval;
  int                           chekglbval;
  DgraphHaloRequest             requdat;

  grafptr = (Dgraph *) libgrafptr;
  mappptr = (LibDmapping *) libmappptr;

  if ((grafptr->vertglbnbr == 0) ||               /* Return if nothing to do */
      (grafptr->edgeglbnbr == 0))
    return (0);

  archDomFrst (&mappptr->m.archdat, &domnfrst);   /* Get architecture domain      */
  tgtnbr = archDomSize (&mappptr->m.archdat, &domnfrst); /* Get architecture size */

  if (archVar (&mappptr->m.archdat)) {
    errorPrint ("SCOTCH_dgraphMapView: not implemented");
    return     (1);
  }

  if (dgraphGhst (grafptr) != 0) {                /* Compute ghost edge array if not already present */
    errorPrint ("SCOTCH_dgraphMapView: cannot compute ghost edge array");
    return     (1);
  }

  nmskidxnbr = (tgtnbr + 1 + ((sizeof (int) << 3) - 1)) / (sizeof (int) << 3); /* Size of neighbor subdomain bitfield; TRICK: "+1" to have a "-1" cell for unmapped vertices */

  cheklocval = 0;
  if (memAllocGroup ((void **) (void *)
                     &nmskloctab, (size_t) (nmskidxnbr          * sizeof (unsigned int)),
                     &nmskglbtab, (size_t) (nmskidxnbr          * sizeof (unsigned int)),
                     &tgloloctab, (size_t) ((tgtnbr + 1)        * sizeof (Gnum)), /* TRICK: "+1" to have a "-1" cell for unmapped vertices */
                     &tgloglbtab, (size_t) (tgtnbr              * sizeof (Gnum)),
                     &termgsttax, (size_t) (grafptr->vertgstnbr * sizeof (Gnum)), NULL) == NULL) {
    cheklocval = 1;
  }
  if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (1)");
    return     (1);
  }
  if (chekglbval != 0) {
    if (nmskloctab != NULL)
      memFree (nmskloctab);
    errorPrint ("SCOTCH_dgraphMapView: out of memory");
    return     (1);
  }

  if (dmapTerm (&mappptr->m, grafptr, termgsttax) != 0) {
    errorPrint ("SCOTCH_dgraphMapView: cannot build local terminal array");
    memFree    (nmskloctab);
    return     (1);
  }
  dgraphHaloAsync (grafptr, termgsttax, GNUM_MPI, &requdat);
  termgsttax -= grafptr->baseval;

  memSet (tgloloctab, 0, (tgtnbr + 1) * sizeof (Gnum));
  tgloloctab ++;                                  /* TRICK: trim array for "-1" cell */

  veloval = 1;
  for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) {
#ifdef SCOTCH_DEBUG_DMAP2
    if ((termgsttax[vertlocnum] < -1) || (termgsttax[vertlocnum] >= tgtnbr)) {
      errorPrint ("SCOTCH_dgraphMapView: invalid local terminal array");
      memFree    (nmskloctab);                      /* Free group leader */
      return     (1);
    }
#endif /* SCOTCH_DEBUG_DMAP2 */
    if (grafptr->veloloctax != NULL)
      veloval = grafptr->veloloctax[vertlocnum];
    tgloloctab[termgsttax[vertlocnum]] += veloval; /* One more vertex of given weight assigned to this target */
  }

  if (MPI_Allreduce (tgloloctab, tgloglbtab, tgtnbr, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (2)");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  mapmin = GNUMMAX;
  mapmax = 0;
  mapsum = 0;
  mapnbr = 0;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) {
    Gnum                tgtsum;

    tgtsum = tgloglbtab[tgtnum];
    if (tgtsum != 0) {
      mapnbr ++;
      mapsum += tgtsum;
      if (tgtsum < mapmin)
        mapmin = tgtsum;
      if (tgtsum > mapmax)
        mapmax = tgtsum;
    }
  }
  mapavg = (mapnbr == 0) ? 0.0L : ((double) mapsum / (double) mapnbr);

  mapdlt = 0.0L;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++)
    mapdlt += fabs ((double) tgloglbtab[tgtnum] - mapavg);
  mapdlt = (mapnbr != 0) ? mapdlt / ((double) mapnbr * mapavg) : 0.0L;
  mapmmy = (mapnbr != 0) ? (double) mapmax / (double) mapavg : 0.0L;

  if (stream != NULL) {
    fprintf (stream, "M\tProcessors " GNUMSTRING "/" GNUMSTRING "(%g)\n",
             (Gnum) mapnbr,
             (Gnum) tgtnbr,
             (double) mapnbr / (double) tgtnbr);
    fprintf (stream, "M\tTarget min=" GNUMSTRING "\tmax=" GNUMSTRING "\tavg=%g\tdlt=%g\tmaxavg=%g\n",
             (Gnum) mapmin,
             (Gnum) mapmax,
             mapavg,
             mapdlt,
             mapmmy);
  }

  if (dgraphHaloWait (&requdat) != 0) {           /* Wait for ghost terminal data to be exchanged */
    errorPrint ("SCOTCH_dgraphMapView: cannot complete asynchronous halo exchange");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  ngbmin = ANUMMAX;
  ngbmax = 0;
  ngbsum = 0;
  for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) {  /* For all subdomain indices */
    int                 nmskidxnum;
    Gnum                vertlocnum;
    Anum                ngbnbr;

    if (tgloglbtab[tgtnum] <= 0)                  /* If empty subdomain, skip it */
      continue;

    memSet (nmskloctab, 0, nmskidxnbr * sizeof (int)); /* Reset neighbor bit mask */

    for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */
      Gnum                termnum;
      Gnum                edgelocnum;
      Gnum                edgelocnnd;

      termnum = termgsttax[vertlocnum];
      if (termnum != tgtnum)                      /* If vertex does not belong to current part or is not mapped, skip it */
        continue;

      for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum];
           edgelocnum < edgelocnnd; edgelocnum ++) {
        Gnum                termend;

        termend = termgsttax[grafptr->edgegsttax[edgelocnum]];
        if (termend != tgtnum) {                  /* If edge is not internal             */
          termend ++;                             /* TRICK: turn unmapped to 0 and so on */
          nmskloctab[termend / (sizeof (int) << 3)] |= 1 << (termend & ((sizeof (int) << 3) - 1)); /* Flag neighbor in bit array */
        }
      }
    }
    nmskloctab[0] &= ~1;                          /* Do not account for unmapped vertices (terminal domain 0 because of "+1") */

    if (MPI_Allreduce (nmskloctab, nmskglbtab, nmskidxnbr, MPI_INT, MPI_BOR, grafptr->proccomm) != MPI_SUCCESS) {
      errorPrint ("SCOTCH_dgraphMapView: communication error (3)");
      memFree    (nmskloctab);                    /* Free group leader */
      return     (1);
    }

    for (nmskidxnum = 0, ngbnbr = 0; nmskidxnum < nmskidxnbr; nmskidxnum ++) {
      unsigned int        nmskbitval;

      for (nmskbitval = nmskglbtab[nmskidxnum]; nmskbitval != 0; nmskbitval >>= 1)
        ngbnbr += nmskbitval & 1;
    }

    ngbsum += ngbnbr;
    if (ngbnbr < ngbmin)
      ngbmin = ngbnbr;
    if (ngbnbr > ngbmax)
      ngbmax = ngbnbr;
  }

  if (stream != NULL) {
    fprintf (stream, "M\tNeighbors min=" GNUMSTRING "\tmax=" GNUMSTRING "\tsum=" GNUMSTRING "\n",
             (Gnum) ngbmin,
             (Gnum) ngbmax,
             (Gnum) ngbsum);
  }

  memSet (commlocdist, 0, 256 * sizeof (Gnum));   /* Initialize the data */
  commlocload  =
  commlocdilat =
  commlocexpan = 0;

  edloval = 1;
  for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */
    Gnum                termlocnum;
    ArchDom             termdomdat;
    Gnum                edgelocnum;
    Gnum                edgelocnnd;

    termlocnum = termgsttax[vertlocnum];
    if (termlocnum == ~0)                         /* Skip unmapped vertices */
      continue;

    archDomTerm (&mappptr->m.archdat, &termdomdat, termlocnum);

    for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum];
         edgelocnum < edgelocnnd; edgelocnum ++) {
      ArchDom             termdomend;
      Gnum                termgstend;
      Anum                distval;

      termgstend = termgsttax[grafptr->edgegsttax[edgelocnum]];
      if (termgstend == ~0)                       /* Skip unmapped end vertices */
        continue;

      distval = 0;
      if (grafptr->edloloctax != NULL)            /* Get edge weight if any */
        edloval = grafptr->edloloctax[edgelocnum];
      if (termgstend != termlocnum) {             /* If not same domain, compute distance */
        archDomTerm (&mappptr->m.archdat, &termdomend, termgstend);
        distval = archDomDist (&mappptr->m.archdat, &termdomdat, &termdomend);
      }
      commlocdist[(distval > 255) ? 255 : distval] += edloval;
      commlocload  += edloval;
      commlocdilat += distval;
      commlocexpan += distval * edloval;
    }
  }
  commlocdist[256]     = commlocload;
  commlocdist[256 + 1] = commlocdilat;
  commlocdist[256 + 2] = commlocexpan;

  if (MPI_Allreduce (commlocdist, commglbdist, 256 + 3, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) {
    errorPrint ("SCOTCH_dgraphMapView: communication error (4)");
    memFree    (nmskloctab);                      /* Free group leader */
    return     (1);
  }

  if (stream != NULL) {
    Gnum                commglbload;

    commglbload = commglbdist[256];
    fprintf (stream, "M\tCommDilat=%f\t(" GNUMSTRING ")\n", /* Print expansion parameters */
           (double) commglbdist[256 + 1] / grafptr->edgeglbnbr,
           (Gnum) (commglbdist[256 + 1] / 2));
    fprintf (stream, "M\tCommExpan=%f\t(" GNUMSTRING ")\n",
             ((commglbload == 0) ? (double) 0.0L
                                 : (double) commglbdist[256 + 2] / (double) commglbload),
             (Gnum) (commglbdist[256 + 2] / 2));
    fprintf (stream, "M\tCommCutSz=%f\t(" GNUMSTRING ")\n",
             ((commglbload == 0) ? (double) 0.0L
                                 : (double) (commglbload - commglbdist[0]) / (double) commglbload),
             (Gnum) ((commglbload - commglbdist[0]) / 2));
    fprintf (stream, "M\tCommDelta=%f\n",
             (((double) commglbload  * (double) commglbdist[256 + 1]) == 0.0L)
             ? (double) 0.0L
             : ((double) commglbdist[256 + 2] * (double) grafptr->edgeglbnbr) /
               ((double) commglbload * (double) commglbdist[256 + 2]));

    for (distmax = 255; distmax != -1; distmax --)  /* Find longest distance */
      if (commglbdist[distmax] != 0)
        break;
    for (distval = 0; distval <= distmax; distval ++) /* Print distance histogram */
      fprintf (stream, "M\tCommLoad[" ANUMSTRING "]=%f\n",
               (Anum) distval,
               (double) commglbdist[distval] / (double) commglbload);
  }

  memFree (nmskloctab);                           /* Free group leader */

  return (0);
}