int SCOTCH_dgraphHaloAsync ( SCOTCH_Dgraph * const grafptr, void * const datatab, const MPI_Datatype typeval, SCOTCH_DgraphHaloReq * const requptr) { dgraphHaloAsync ((Dgraph *) grafptr, (byte *) datatab, typeval, (DgraphHaloRequest *) requptr); return (0); }
int bdgraphBipartBd ( Bdgraph * const orggrafptr, /*+ Distributed graph +*/ const BdgraphBipartBdParam * const paraptr) /*+ Method parameters +*/ { Bdgraph bndgrafdat; /* Bipartitioning band graph structure */ Gnum bndvertancnnd; /* End of local vertex array, without anchors */ Gnum bndvertlocnbr1; /* Number of band graph vertices in part 1 except anchor 1 */ Gnum bndvertlocnum; Gnum bndvertlvlnum; /* Based number of first band vertex in last layer */ Gnum bndvertlocancadj; /* Flag set when anchor(s) represent unexistent vertices */ Gnum bndvertglbancadj; /* Global adjustment of anchor vertices */ Gnum bndveexlocsum; /* Local sum of veexloctax array cells for band graph */ Gnum bndveexlocsum0; /* Local sum of veexloctax array cells in part 0 for band graph */ Gnum bndedlolocval; Gnum bndfronlocnum; Gnum orgfronlocnum; int * restrict orgflagloctab; Gnum orgvertlocnum; Gnum orgedlolocval; const int * restrict orgprocsidtab; int orgprocsidnbr; int orgprocsidnum; int orgprocsidval; Gnum complocsizeadj0; Gnum commlocloadintn; Gnum commlocloadintn2; /* Twice twice (4 times) the internal communication load of last layer */ Gnum commlocloadextn; Gnum commlocgainextn; Gnum reduloctab[7]; Gnum reduglbtab[7]; DgraphHaloRequest requdat; if (orggrafptr->fronglbnbr == 0) /* If no separator vertices, apply strategy to full (original) graph */ return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); if (dgraphBand (&orggrafptr->s, orggrafptr->fronlocnbr, orggrafptr->fronloctab, orggrafptr->partgsttax, orggrafptr->complocload0, orggrafptr->s.velolocsum - orggrafptr->complocload0, paraptr->distmax, &bndgrafdat.s, &bndgrafdat.fronloctab, &bndgrafdat.partgsttax, &bndvertlvlnum, &bndvertlocnbr1, &bndvertlocancadj) != 0) { errorPrint ("bdgraphBipartBd: cannot create band graph"); return (1); } bndvertancnnd = bndgrafdat.s.vertlocnnd - 2; reduloctab[0] = 0; /* Assume no memory allocation problem */ bndveexlocsum = bndveexlocsum0 = 0; bndgrafdat.veexloctax = NULL; /* Assume no external gains */ if (orggrafptr->veexloctax != NULL) { if ((bndgrafdat.veexloctax = memAlloc (bndgrafdat.s.vertlocnbr * sizeof (Gnum))) == NULL) { errorPrint ("bdgraphBipartBd: out of memory (1)"); reduloctab[0] = 1; /* Memory error */ } else { Gnum bndvertlocnum; bndgrafdat.veexloctax -= bndgrafdat.s.baseval; for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { Gnum veexval; veexval = orggrafptr->veexloctax[bndgrafdat.s.vnumloctax[bndvertlocnum]]; bndgrafdat.veexloctax[bndvertlocnum] = veexval; bndveexlocsum += veexval; bndveexlocsum0 += veexval & (((Gnum) bndgrafdat.partgsttax[bndvertlocnum]) - 1); } } } reduloctab[1] = bndgrafdat.s.vendloctax[bndvertancnnd] - bndgrafdat.s.vertloctax[bndvertancnnd] - (orggrafptr->s.procglbnbr - 1); /* Anchor degrees */ reduloctab[2] = bndgrafdat.s.vendloctax[bndvertancnnd + 1] - bndgrafdat.s.vertloctax[bndvertancnnd + 1] - (orggrafptr->s.procglbnbr - 1); bndgrafdat.complocsize0 = bndgrafdat.s.vertlocnbr - (bndvertlocnbr1 + 1); /* Add 1 for anchor vertex 1 */ complocsizeadj0 = orggrafptr->complocsize0 - bndgrafdat.complocsize0; /* -1 less because of anchor 0 */ reduloctab[3] = bndgrafdat.complocsize0; reduloctab[4] = bndvertlocancadj; /* Sum increases in size and load */ reduloctab[5] = bndveexlocsum; reduloctab[6] = bndveexlocsum0; if (MPI_Allreduce (reduloctab, reduglbtab, 7, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (1)"); return (1); } if (reduglbtab[0] != 0) { bdgraphExit (&bndgrafdat); return (1); } if ((reduglbtab[1] == 0) || /* If graph is too small to have any usable anchors */ (reduglbtab[2] == 0)) { bdgraphExit (&bndgrafdat); return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); } bndvertglbancadj = reduglbtab[4]; bndgrafdat.veexglbsum = orggrafptr->veexglbsum; /* All external gains preserved */ bndgrafdat.fronlocnbr = orggrafptr->fronlocnbr; /* All separator vertices are kept in band graph */ bndgrafdat.fronglbnbr = orggrafptr->fronglbnbr; bndgrafdat.complocload0 = orggrafptr->complocload0 + bndvertlocancadj; /* All loads are kept in band graph */ bndgrafdat.compglbload0 = orggrafptr->compglbload0 + bndvertglbancadj; bndgrafdat.compglbload0min = orggrafptr->compglbload0min + bndvertglbancadj; /* Tilt extrema loads according to adjustments */ bndgrafdat.compglbload0max = orggrafptr->compglbload0max + bndvertglbancadj; bndgrafdat.compglbload0avg = orggrafptr->compglbload0avg + bndvertglbancadj; /* Tilt average load according to adjustments */ bndgrafdat.compglbload0dlt = orggrafptr->compglbload0dlt; bndgrafdat.compglbsize0 = reduglbtab[3]; bndgrafdat.commglbload = orggrafptr->commglbload; bndgrafdat.commglbgainextn = orggrafptr->commglbgainextn; bndgrafdat.commglbloadextn0 = orggrafptr->commglbloadextn0; bndgrafdat.commglbgainextn0 = orggrafptr->commglbgainextn0; bndgrafdat.bbalglbval = orggrafptr->bbalglbval; bndgrafdat.domndist = orggrafptr->domndist; bndgrafdat.domnwght[0] = orggrafptr->domnwght[0]; bndgrafdat.domnwght[1] = orggrafptr->domnwght[1]; bndgrafdat.levlnum = orggrafptr->levlnum; if (bndgrafdat.veexloctax != NULL) { Gnum bndveexglbanc0; Gnum bndveexglbanc1; bndveexglbanc0 = (orggrafptr->veexglbsum + orggrafptr->commglbgainextn) / 2 - reduglbtab[6]; /* Compute global external gains of anchors */ bndveexglbanc1 = (orggrafptr->veexglbsum - bndveexglbanc0) - reduglbtab[5]; bndgrafdat.veexloctax[bndvertancnnd] = DATASIZE (bndveexglbanc0, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); /* Spread gains across local anchors */ bndgrafdat.veexloctax[bndvertancnnd + 1] = DATASIZE (bndveexglbanc1, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); } #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (&bndgrafdat) != 0) { errorPrint ("bdgraphBipartBd: internal error (1)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ if (bdgraphBipartSt (&bndgrafdat, paraptr->stratbnd) != 0) { /* Separate distributed band graph */ errorPrint ("bdgraphBipartBd: cannot separate band graph"); bdgraphExit (&bndgrafdat); return (1); } reduloctab[0] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd]; /* Check if anchor vertices remain in their parts */ reduloctab[1] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd + 1]; reduloctab[2] = complocsizeadj0; reduloctab[3] = 0; /* Assume memory allocation is all right */ if ((orgflagloctab = memAlloc (flagSize (orggrafptr->s.vertlocnnd) * sizeof (int))) == NULL) { /* Eventually keep space for based indices */ errorPrint ("bdgraphBipartBd: out of memory (2)"); reduloctab[3] = 1; } if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (2)"); return (1); } if (((reduglbtab[0] + reduglbtab[1]) != orggrafptr->s.procglbnbr) || /* If not all anchors of initial same parts in same parts */ ((reduglbtab[0] != 0) && (reduglbtab[0] != orggrafptr->s.procglbnbr)) || (reduglbtab[3] != 0)) { if (orgflagloctab != NULL) memFree (orgflagloctab); bdgraphExit (&bndgrafdat); /* Apply original strategy to full graph */ return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); } if (dgraphGhst (&bndgrafdat.s) != 0) { /* Compute ghost edge array if not already present */ errorPrint ("bdgraphBipartBd: cannot compute ghost edge array"); return (1); } if (reduglbtab[0] == orggrafptr->s.procglbnbr) { /* If all anchors swapped parts, swap all parts of original vertices */ Gnum orgvertnum; orggrafptr->complocsize0 = orggrafptr->s.vertlocnbr - reduloctab[2] - bndgrafdat.s.vertlocnbr + bndgrafdat.complocsize0; orggrafptr->compglbsize0 = orggrafptr->s.vertglbnbr - reduglbtab[2] - bndgrafdat.s.vertglbnbr + bndgrafdat.compglbsize0; for (orgvertnum = orggrafptr->s.baseval; orgvertnum < orggrafptr->s.vertlocnnd; orgvertnum ++) orggrafptr->partgsttax[orgvertnum] ^= 1; } else { orggrafptr->complocsize0 = reduloctab[2] + bndgrafdat.complocsize0; orggrafptr->compglbsize0 = reduglbtab[2] + bndgrafdat.compglbsize0; } for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) /* Update part array of all vertices except anchors */ orggrafptr->partgsttax[bndgrafdat.s.vnumloctax[bndvertlocnum]] = bndgrafdat.partgsttax[bndvertlocnum]; dgraphHaloAsync (&orggrafptr->s, (byte *) (orggrafptr->partgsttax + orggrafptr->s.baseval), GRAPHPART_MPI, &requdat); /* Share part array of full graph */ commlocloadintn = commlocloadextn = commlocgainextn = 0; bndedlolocval = 1; /* Assume no edge loads */ for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertlvlnum; bndvertlocnum ++) { /* For all vertices of band graph save for last layer */ Gnum bndedgelocnum; Gnum bndedgelocnnd; Gnum bndpartval; bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum]; if (bndgrafdat.veexloctax != NULL) { commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval; commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2); } for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum]; bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) { Gnum bndvertlocend; Gnum bndpartend; bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum]; bndpartend = bndgrafdat.partgsttax[bndvertlocend]; if (bndgrafdat.s.edloloctax != NULL) bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum]; commlocloadintn += (bndpartval ^ bndpartend) * bndedlolocval; /* Internal load is accounted for twice */ } } for ( ; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer, remove internal loads to band vertices once */ Gnum bndedgelocnum; Gnum bndedgelocnnd; Gnum bndpartval; bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum]; if (bndgrafdat.veexloctax != NULL) { commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval; commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2); } for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum] - 1; /* "-1" to avoid anchor edges */ bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) { Gnum bndvertlocend; Gnum bndpartend; bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum]; bndpartend = bndgrafdat.partgsttax[bndvertlocend]; if (bndgrafdat.s.edloloctax != NULL) bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum]; commlocloadintn -= (bndpartval ^ bndpartend) * bndedlolocval; /* Remove internal loads to band graph vertices once because afterwards they will be accounted for twice */ } } memSet (orgflagloctab, 0, flagSize (orggrafptr->s.vertlocnnd) * sizeof (int)); /* Set vertices as not already considered */ for (bndfronlocnum = orgfronlocnum = 0; bndfronlocnum < bndgrafdat.fronlocnbr; bndfronlocnum ++) { /* Project back separator except for last layer */ Gnum bndvertlocnum; bndvertlocnum = bndgrafdat.fronloctab[bndfronlocnum]; if (bndvertlocnum < bndvertlvlnum) { /* If vertex does not belong to last layer */ Gnum orgvertlocnum; orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum]; flagSet (orgflagloctab, orgvertlocnum); /* Set vertex as processed */ orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; } } if (dgraphHaloWait (&requdat) != 0) { errorPrint ("bdgraphBipartBd: cannot complete asynchronous halo exchange"); return (1); } orgedlolocval = 1; /* Assume no edge loads */ commlocloadintn2 = 0; for (bndvertlocnum = bndvertlvlnum; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer */ Gnum orgedgelocnum; Gnum orgedgelocnnd; Gnum orgvertlocnum; GraphPart orgpartval; Gnum orgflagval; orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum]; orgpartval = bndgrafdat.partgsttax[bndvertlocnum]; orgflagval = 0; /* Assume vertex does not belong to the frontier */ for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum]; orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) { Gnum orgvertlocend; Gnum orgpartend; Gnum orgflagtmp; orgvertlocend = orggrafptr->s.edgegsttax[orgedgelocnum]; orgpartend = orggrafptr->partgsttax[orgvertlocend]; orgflagtmp = orgpartval ^ orgpartend; if (bndgrafdat.s.edloloctax != NULL) orgedlolocval = orggrafptr->s.edloloctax[orgedgelocnum]; orgflagval |= orgflagtmp; commlocloadintn2 += orgflagtmp * orgedlolocval; /* Internal load to band and original graph vertices are accounted for twice */ if ((orgflagtmp != 0) && (orgvertlocend < orggrafptr->s.vertlocnnd) && (flagVal (orgflagloctab, orgvertlocend) == 0)) { orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocend; flagSet (orgflagloctab, orgvertlocend); } } if ((orgflagval != 0) && (flagVal (orgflagloctab, orgvertlocnum) == 0)) orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; flagSet (orgflagloctab, orgvertlocnum); /* Set vertex as processed anyway */ } commlocloadintn += 2 * commlocloadintn2; /* Add twice the internal load of original graph edges and once the one of band edges (one removed before) */ orggrafptr->complocload0 = bndgrafdat.complocload0 - bndvertlocancadj; orggrafptr->compglbload0 = bndgrafdat.compglbload0 - bndvertglbancadj; orggrafptr->compglbload0dlt = orggrafptr->compglbload0 - orggrafptr->compglbload0avg; orgprocsidnbr = orggrafptr->s.procsidnbr; if (orgprocsidnbr == 0) goto loop_exit; orgvertlocnum = orggrafptr->s.baseval; orgprocsidnum = 0; orgprocsidtab = orggrafptr->s.procsidtab; orgprocsidval = orgprocsidtab[orgprocsidnum ++]; while (1) { /* Scan all vertices which have foreign neighbors */ while (orgprocsidval < 0) { orgvertlocnum -= (Gnum) orgprocsidval; orgprocsidval = orgprocsidtab[orgprocsidnum ++]; } if (flagVal (orgflagloctab, orgvertlocnum) == 0) { /* If vertex not already processed */ Gnum orgedgelocnum; Gnum orgedgelocnnd; GraphPart orgpartval; orgpartval = orggrafptr->partgsttax[orgvertlocnum]; for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum]; orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) { if (orggrafptr->partgsttax[orggrafptr->s.edgegsttax[orgedgelocnum]] != orgpartval) { orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; break; } } } do { if (orgprocsidnum >= orgprocsidnbr) goto loop_exit; } while ((orgprocsidval = orgprocsidtab[orgprocsidnum ++]) >= 0); } loop_exit : memFree (orgflagloctab); reduloctab[0] = commlocloadintn; /* Twice the internal load; sum globally before dividing by two */ reduloctab[1] = commlocloadextn; reduloctab[2] = commlocgainextn; reduloctab[3] = orgfronlocnum; if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (3)"); return (1); } orggrafptr->fronlocnbr = orgfronlocnum; orggrafptr->fronglbnbr = reduglbtab[3]; orggrafptr->commglbload = (reduglbtab[0] / 2) * orggrafptr->domndist + reduglbtab[1]; orggrafptr->commglbgainextn = reduglbtab[2]; orggrafptr->bbalglbval = (double) ((orggrafptr->compglbload0dlt < 0) ? (- orggrafptr->compglbload0dlt) : orggrafptr->compglbload0dlt) / (double) orggrafptr->compglbload0avg; #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (orggrafptr) != 0) { errorPrint ("bdgraphBipartBd: internal error (2)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ bdgraphExit (&bndgrafdat); return (0); }
int SCOTCH_dgraphMapView ( SCOTCH_Dgraph * const libgrafptr, const SCOTCH_Dmapping * const libmappptr, FILE * const stream) { Dgraph * restrict grafptr; const LibDmapping * restrict mappptr; ArchDom domnfrst; /* Largest domain in architecture */ unsigned int * restrict nmskloctab; /* Local neighbor bitfield */ unsigned int * restrict nmskglbtab; /* Local neighbor bitfield */ int nmskidxnbr; /* Size of bitfield; int since sent by MPI */ Gnum * restrict tgloloctab; /* Local array of terminal domain loads */ Gnum * restrict tgloglbtab; /* Global array of terminal domain loads */ Gnum * restrict termgsttax; /* Terminal domain ghost mapping array */ Anum tgtnbr; /* Number of processors in target topology */ Anum tgtnum; Anum mapnbr; /* Number of processors effectively used */ double mapavg; /* Average mapping weight */ Gnum mapmin; Gnum mapmax; Gnum mapsum; /* (Partial) sum of vertex loads */ double mapdlt; double mapmmy; /* Maximum / average ratio */ Anum ngbsum; Anum ngbmin; Anum ngbmax; Gnum vertlocnum; Gnum veloval; Gnum edloval; Gnum commlocdist[256 + 3]; /* Array of local load distribution */ Gnum commglbdist[256 + 3]; Gnum commlocload; /* Total local edge load (edge sum) */ Gnum commlocdilat; /* Total edge dilation */ Gnum commlocexpan; /* Total edge expansion */ Anum distmax; Anum distval; int cheklocval; int chekglbval; DgraphHaloRequest requdat; grafptr = (Dgraph *) libgrafptr; mappptr = (LibDmapping *) libmappptr; if ((grafptr->vertglbnbr == 0) || /* Return if nothing to do */ (grafptr->edgeglbnbr == 0)) return (0); archDomFrst (&mappptr->m.archdat, &domnfrst); /* Get architecture domain */ tgtnbr = archDomSize (&mappptr->m.archdat, &domnfrst); /* Get architecture size */ if (archVar (&mappptr->m.archdat)) { errorPrint ("SCOTCH_dgraphMapView: not implemented"); return (1); } if (dgraphGhst (grafptr) != 0) { /* Compute ghost edge array if not already present */ errorPrint ("SCOTCH_dgraphMapView: cannot compute ghost edge array"); return (1); } nmskidxnbr = (tgtnbr + 1 + ((sizeof (int) << 3) - 1)) / (sizeof (int) << 3); /* Size of neighbor subdomain bitfield; TRICK: "+1" to have a "-1" cell for unmapped vertices */ cheklocval = 0; if (memAllocGroup ((void **) (void *) &nmskloctab, (size_t) (nmskidxnbr * sizeof (unsigned int)), &nmskglbtab, (size_t) (nmskidxnbr * sizeof (unsigned int)), &tgloloctab, (size_t) ((tgtnbr + 1) * sizeof (Gnum)), /* TRICK: "+1" to have a "-1" cell for unmapped vertices */ &tgloglbtab, (size_t) (tgtnbr * sizeof (Gnum)), &termgsttax, (size_t) (grafptr->vertgstnbr * sizeof (Gnum)), NULL) == NULL) { cheklocval = 1; } if (MPI_Allreduce (&cheklocval, &chekglbval, 1, MPI_INT, MPI_MAX, grafptr->proccomm) != MPI_SUCCESS) { errorPrint ("SCOTCH_dgraphMapView: communication error (1)"); return (1); } if (chekglbval != 0) { if (nmskloctab != NULL) memFree (nmskloctab); errorPrint ("SCOTCH_dgraphMapView: out of memory"); return (1); } if (dmapTerm (&mappptr->m, grafptr, termgsttax) != 0) { errorPrint ("SCOTCH_dgraphMapView: cannot build local terminal array"); memFree (nmskloctab); return (1); } dgraphHaloAsync (grafptr, termgsttax, GNUM_MPI, &requdat); termgsttax -= grafptr->baseval; memSet (tgloloctab, 0, (tgtnbr + 1) * sizeof (Gnum)); tgloloctab ++; /* TRICK: trim array for "-1" cell */ veloval = 1; for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { #ifdef SCOTCH_DEBUG_DMAP2 if ((termgsttax[vertlocnum] < -1) || (termgsttax[vertlocnum] >= tgtnbr)) { errorPrint ("SCOTCH_dgraphMapView: invalid local terminal array"); memFree (nmskloctab); /* Free group leader */ return (1); } #endif /* SCOTCH_DEBUG_DMAP2 */ if (grafptr->veloloctax != NULL) veloval = grafptr->veloloctax[vertlocnum]; tgloloctab[termgsttax[vertlocnum]] += veloval; /* One more vertex of given weight assigned to this target */ } if (MPI_Allreduce (tgloloctab, tgloglbtab, tgtnbr, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) { errorPrint ("SCOTCH_dgraphMapView: communication error (2)"); memFree (nmskloctab); /* Free group leader */ return (1); } mapmin = GNUMMAX; mapmax = 0; mapsum = 0; mapnbr = 0; for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) { Gnum tgtsum; tgtsum = tgloglbtab[tgtnum]; if (tgtsum != 0) { mapnbr ++; mapsum += tgtsum; if (tgtsum < mapmin) mapmin = tgtsum; if (tgtsum > mapmax) mapmax = tgtsum; } } mapavg = (mapnbr == 0) ? 0.0L : ((double) mapsum / (double) mapnbr); mapdlt = 0.0L; for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) mapdlt += fabs ((double) tgloglbtab[tgtnum] - mapavg); mapdlt = (mapnbr != 0) ? mapdlt / ((double) mapnbr * mapavg) : 0.0L; mapmmy = (mapnbr != 0) ? (double) mapmax / (double) mapavg : 0.0L; if (stream != NULL) { fprintf (stream, "M\tProcessors " GNUMSTRING "/" GNUMSTRING "(%g)\n", (Gnum) mapnbr, (Gnum) tgtnbr, (double) mapnbr / (double) tgtnbr); fprintf (stream, "M\tTarget min=" GNUMSTRING "\tmax=" GNUMSTRING "\tavg=%g\tdlt=%g\tmaxavg=%g\n", (Gnum) mapmin, (Gnum) mapmax, mapavg, mapdlt, mapmmy); } if (dgraphHaloWait (&requdat) != 0) { /* Wait for ghost terminal data to be exchanged */ errorPrint ("SCOTCH_dgraphMapView: cannot complete asynchronous halo exchange"); memFree (nmskloctab); /* Free group leader */ return (1); } ngbmin = ANUMMAX; ngbmax = 0; ngbsum = 0; for (tgtnum = 0; tgtnum < tgtnbr; tgtnum ++) { /* For all subdomain indices */ int nmskidxnum; Gnum vertlocnum; Anum ngbnbr; if (tgloglbtab[tgtnum] <= 0) /* If empty subdomain, skip it */ continue; memSet (nmskloctab, 0, nmskidxnbr * sizeof (int)); /* Reset neighbor bit mask */ for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */ Gnum termnum; Gnum edgelocnum; Gnum edgelocnnd; termnum = termgsttax[vertlocnum]; if (termnum != tgtnum) /* If vertex does not belong to current part or is not mapped, skip it */ continue; for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum]; edgelocnum < edgelocnnd; edgelocnum ++) { Gnum termend; termend = termgsttax[grafptr->edgegsttax[edgelocnum]]; if (termend != tgtnum) { /* If edge is not internal */ termend ++; /* TRICK: turn unmapped to 0 and so on */ nmskloctab[termend / (sizeof (int) << 3)] |= 1 << (termend & ((sizeof (int) << 3) - 1)); /* Flag neighbor in bit array */ } } } nmskloctab[0] &= ~1; /* Do not account for unmapped vertices (terminal domain 0 because of "+1") */ if (MPI_Allreduce (nmskloctab, nmskglbtab, nmskidxnbr, MPI_INT, MPI_BOR, grafptr->proccomm) != MPI_SUCCESS) { errorPrint ("SCOTCH_dgraphMapView: communication error (3)"); memFree (nmskloctab); /* Free group leader */ return (1); } for (nmskidxnum = 0, ngbnbr = 0; nmskidxnum < nmskidxnbr; nmskidxnum ++) { unsigned int nmskbitval; for (nmskbitval = nmskglbtab[nmskidxnum]; nmskbitval != 0; nmskbitval >>= 1) ngbnbr += nmskbitval & 1; } ngbsum += ngbnbr; if (ngbnbr < ngbmin) ngbmin = ngbnbr; if (ngbnbr > ngbmax) ngbmax = ngbnbr; } if (stream != NULL) { fprintf (stream, "M\tNeighbors min=" GNUMSTRING "\tmax=" GNUMSTRING "\tsum=" GNUMSTRING "\n", (Gnum) ngbmin, (Gnum) ngbmax, (Gnum) ngbsum); } memSet (commlocdist, 0, 256 * sizeof (Gnum)); /* Initialize the data */ commlocload = commlocdilat = commlocexpan = 0; edloval = 1; for (vertlocnum = grafptr->baseval; vertlocnum < grafptr->vertlocnnd; vertlocnum ++) { /* For all local vertices */ Gnum termlocnum; ArchDom termdomdat; Gnum edgelocnum; Gnum edgelocnnd; termlocnum = termgsttax[vertlocnum]; if (termlocnum == ~0) /* Skip unmapped vertices */ continue; archDomTerm (&mappptr->m.archdat, &termdomdat, termlocnum); for (edgelocnum = grafptr->vertloctax[vertlocnum], edgelocnnd = grafptr->vendloctax[vertlocnum]; edgelocnum < edgelocnnd; edgelocnum ++) { ArchDom termdomend; Gnum termgstend; Anum distval; termgstend = termgsttax[grafptr->edgegsttax[edgelocnum]]; if (termgstend == ~0) /* Skip unmapped end vertices */ continue; distval = 0; if (grafptr->edloloctax != NULL) /* Get edge weight if any */ edloval = grafptr->edloloctax[edgelocnum]; if (termgstend != termlocnum) { /* If not same domain, compute distance */ archDomTerm (&mappptr->m.archdat, &termdomend, termgstend); distval = archDomDist (&mappptr->m.archdat, &termdomdat, &termdomend); } commlocdist[(distval > 255) ? 255 : distval] += edloval; commlocload += edloval; commlocdilat += distval; commlocexpan += distval * edloval; } } commlocdist[256] = commlocload; commlocdist[256 + 1] = commlocdilat; commlocdist[256 + 2] = commlocexpan; if (MPI_Allreduce (commlocdist, commglbdist, 256 + 3, GNUM_MPI, MPI_SUM, grafptr->proccomm) != MPI_SUCCESS) { errorPrint ("SCOTCH_dgraphMapView: communication error (4)"); memFree (nmskloctab); /* Free group leader */ return (1); } if (stream != NULL) { Gnum commglbload; commglbload = commglbdist[256]; fprintf (stream, "M\tCommDilat=%f\t(" GNUMSTRING ")\n", /* Print expansion parameters */ (double) commglbdist[256 + 1] / grafptr->edgeglbnbr, (Gnum) (commglbdist[256 + 1] / 2)); fprintf (stream, "M\tCommExpan=%f\t(" GNUMSTRING ")\n", ((commglbload == 0) ? (double) 0.0L : (double) commglbdist[256 + 2] / (double) commglbload), (Gnum) (commglbdist[256 + 2] / 2)); fprintf (stream, "M\tCommCutSz=%f\t(" GNUMSTRING ")\n", ((commglbload == 0) ? (double) 0.0L : (double) (commglbload - commglbdist[0]) / (double) commglbload), (Gnum) ((commglbload - commglbdist[0]) / 2)); fprintf (stream, "M\tCommDelta=%f\n", (((double) commglbload * (double) commglbdist[256 + 1]) == 0.0L) ? (double) 0.0L : ((double) commglbdist[256 + 2] * (double) grafptr->edgeglbnbr) / ((double) commglbload * (double) commglbdist[256 + 2])); for (distmax = 255; distmax != -1; distmax --) /* Find longest distance */ if (commglbdist[distmax] != 0) break; for (distval = 0; distval <= distmax; distval ++) /* Print distance histogram */ fprintf (stream, "M\tCommLoad[" ANUMSTRING "]=%f\n", (Anum) distval, (double) commglbdist[distval] / (double) commglbload); } memFree (nmskloctab); /* Free group leader */ return (0); }