int bdgraphBipartSq ( Bdgraph * const dgrfptr, /*+ Distributed graph +*/ const BdgraphBipartSqParam * const paraptr) /*+ Method parameters +*/ { Bgraph cgrfdat; /* Centralized bipartitioned graph structure */ Gnum reduloctab[6]; /* Local array for best bipartition data (7 for Bcast) */ Gnum reduglbtab[6]; /* Global array for best bipartition data */ MPI_Datatype besttypedat; /* Data type for finding best bipartition */ MPI_Op bestoperdat; /* Handle of MPI operator for finding best bipartition */ int bestprocnum; /* Rank of process holding best partition */ Gnum * restrict vnumloctax; Gnum vertlocnum; Gnum complocsize1; Gnum complocload1; Gnum fronlocnbr; int o; if ((MPI_Type_contiguous (6, GNUM_MPI, &besttypedat) != MPI_SUCCESS) || (MPI_Type_commit (&besttypedat) != MPI_SUCCESS) || (MPI_Op_create ((MPI_User_function *) bdgraphBipartSqOpBest, 1, &bestoperdat) != MPI_SUCCESS)) { errorPrint ("bdgraphBipartSq: communication error (1)"); return (1); } reduloctab[0] = /* In case of error, maximum communication load */ reduloctab[1] = GNUMMAX; /* And maximum load imbalance */ reduloctab[2] = dgrfptr->s.proclocnum; reduloctab[3] = /* Assume sequential bipartioning went fine */ reduloctab[4] = 0; reduloctab[5] = 0; /* Assume no errors */ vnumloctax = dgrfptr->s.vnumloctax; /* No need for vertex number array when centralizing graph */ dgrfptr->s.vnumloctax = NULL; o = bdgraphGatherAll (dgrfptr, &cgrfdat); dgrfptr->s.vnumloctax = vnumloctax; /* Restore vertex number array */ if (o != 0) { errorPrint ("bdgraphBipartSq: cannot build centralized graph"); return (1); } if (bgraphBipartSt (&cgrfdat, paraptr->strat) != 0) { /* Bipartition centralized graph */ errorPrint ("bdgraphBipartSq: cannot bipartition centralized graph"); reduloctab[3] = reduloctab[4] = 1; } else { /* Fill local array with local bipartition data */ reduloctab[0] = ((cgrfdat.fronnbr != 0) || ((cgrfdat.compsize0 != 0) && ((cgrfdat.s.vertnbr - cgrfdat.compsize0) != 0))) ? cgrfdat.commload : GNUMMAX; /* Partitions with empty bipartitions unwanted if they are completely unbalanced */ reduloctab[1] = cgrfdat.compload0dlt; } if (dgrfptr->partgsttax == NULL) { if (dgraphGhst (&dgrfptr->s) != 0) { /* Compute ghost edge array if not already present, before copying graph fields */ errorPrint ("bdgraphBipartSq: cannot compute ghost edge array"); reduloctab[5] = 1; } else { if ((dgrfptr->partgsttax = (GraphPart *) memAlloc (dgrfptr->s.vertgstnbr * sizeof (GraphPart))) == NULL) { errorPrint ("bdgraphBipartSq: out of memory (1)"); reduloctab[5] = 1; /* Allocated data will be freed along with graph structure */ } dgrfptr->partgsttax -= dgrfptr->s.baseval; } if ((dgrfptr->fronloctab = (Gnum *) memAlloc (dgrfptr->s.vertlocnbr * sizeof (Gnum))) == NULL) { errorPrint ("bdgraphBipartSq: out of memory (2)"); reduloctab[5] = 1; } } if (MPI_Allreduce (reduloctab, reduglbtab, 1, besttypedat, bestoperdat, dgrfptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartSq: communication error (2)"); return (1); } if ((reduloctab[4] != 0) && (reduloctab[4] != dgrfptr->s.procglbnbr)) { errorPrint ("bdgraphBipartSq: internal error"); return (1); } if ((MPI_Op_free (&bestoperdat) != MPI_SUCCESS) || (MPI_Type_free (&besttypedat) != MPI_SUCCESS)) { errorPrint ("bdgraphBipartSq: communication error (3)"); return (1); } if (reduglbtab[3] != 0) { /* If none of the sequential methods succeeded */ bgraphExit (&cgrfdat); return (1); } bestprocnum = (int) reduglbtab[2]; if (dgrfptr->s.proclocnum == bestprocnum) { /* If process holds best partition */ reduloctab[0] = cgrfdat.compload0; /* Global values to share */ reduloctab[1] = cgrfdat.compsize0; reduloctab[2] = cgrfdat.commload; reduloctab[3] = cgrfdat.commgainextn; reduloctab[4] = cgrfdat.fronnbr; } if (MPI_Bcast (reduloctab, 5, GNUM_MPI, bestprocnum, dgrfptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartSq: communication error (4)"); return (1); } dgrfptr->compglbload0 = reduloctab[0]; dgrfptr->compglbload0dlt = reduloctab[0] - dgrfptr->compglbload0avg; dgrfptr->compglbsize0 = reduloctab[1]; dgrfptr->commglbload = reduloctab[2]; dgrfptr->commglbgainextn = reduloctab[3]; dgrfptr->fronglbnbr = reduloctab[4]; if (commScatterv (cgrfdat.parttax, dgrfptr->s.proccnttab, dgrfptr->s.procdsptab, GRAPHPART_MPI, /* No base for sending as procdsptab holds based values */ dgrfptr->partgsttax + dgrfptr->s.baseval, dgrfptr->s.vertlocnbr, GRAPHPART_MPI, bestprocnum, dgrfptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartSq: communication error (5)"); return (1); } if (dgraphHaloSync (&dgrfptr->s, (byte *) (dgrfptr->partgsttax + dgrfptr->s.baseval), GRAPHPART_MPI) != 0) { errorPrint ("bdgraphBipartSq: cannot perform halo exchange"); return (1); } complocsize1 = complocload1 = 0; for (vertlocnum = dgrfptr->s.baseval, fronlocnbr = 0; vertlocnum < dgrfptr->s.vertlocnnd; vertlocnum ++) { int partval; Gnum partval1; Gnum commcut; Gnum edgelocnum; partval = dgrfptr->partgsttax[vertlocnum]; partval1 = partval & 1; complocsize1 += partval1; /* Superscalar update */ if (dgrfptr->s.veloloctax != NULL) { Gnum veloval; veloval = dgrfptr->s.veloloctax[vertlocnum]; complocload1 += (-partval1) & veloval; /* Superscalar update */ } for (edgelocnum = dgrfptr->s.vertloctax[vertlocnum], commcut = 0; edgelocnum < dgrfptr->s.vendloctax[vertlocnum]; edgelocnum ++) { /* Build local frontier */ int partend; int partdlt; partend = dgrfptr->partgsttax[dgrfptr->s.edgegsttax[edgelocnum]]; partdlt = partval ^ partend; commcut |= partdlt; } if (commcut != 0) dgrfptr->fronloctab[fronlocnbr ++] = vertlocnum; } dgrfptr->fronlocnbr = fronlocnbr; dgrfptr->complocsize0 = dgrfptr->s.vertlocnbr - complocsize1; dgrfptr->complocload0 = (dgrfptr->s.veloloctax != NULL) ? (dgrfptr->s.velolocsum - complocload1) : dgrfptr->complocsize0; bgraphExit (&cgrfdat); #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (dgrfptr) != 0) { errorPrint ("bdgraphBipartSq: inconsistent graph data"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ return (0); }
int bdgraphBipartBd ( Bdgraph * const orggrafptr, /*+ Distributed graph +*/ const BdgraphBipartBdParam * const paraptr) /*+ Method parameters +*/ { Bdgraph bndgrafdat; /* Bipartitioning band graph structure */ Gnum bndvertancnnd; /* End of local vertex array, without anchors */ Gnum bndvertlocnbr1; /* Number of band graph vertices in part 1 except anchor 1 */ Gnum bndvertlocnum; Gnum bndvertlvlnum; /* Based number of first band vertex in last layer */ Gnum bndvertlocancadj; /* Flag set when anchor(s) represent unexistent vertices */ Gnum bndvertglbancadj; /* Global adjustment of anchor vertices */ Gnum bndveexlocsum; /* Local sum of veexloctax array cells for band graph */ Gnum bndveexlocsum0; /* Local sum of veexloctax array cells in part 0 for band graph */ Gnum bndedlolocval; Gnum bndfronlocnum; Gnum orgfronlocnum; int * restrict orgflagloctab; Gnum orgvertlocnum; Gnum orgedlolocval; const int * restrict orgprocsidtab; int orgprocsidnbr; int orgprocsidnum; int orgprocsidval; Gnum complocsizeadj0; Gnum commlocloadintn; Gnum commlocloadintn2; /* Twice twice (4 times) the internal communication load of last layer */ Gnum commlocloadextn; Gnum commlocgainextn; Gnum reduloctab[7]; Gnum reduglbtab[7]; DgraphHaloRequest requdat; if (orggrafptr->fronglbnbr == 0) /* If no separator vertices, apply strategy to full (original) graph */ return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); if (dgraphBand (&orggrafptr->s, orggrafptr->fronlocnbr, orggrafptr->fronloctab, orggrafptr->partgsttax, orggrafptr->complocload0, orggrafptr->s.velolocsum - orggrafptr->complocload0, paraptr->distmax, &bndgrafdat.s, &bndgrafdat.fronloctab, &bndgrafdat.partgsttax, &bndvertlvlnum, &bndvertlocnbr1, &bndvertlocancadj) != 0) { errorPrint ("bdgraphBipartBd: cannot create band graph"); return (1); } bndvertancnnd = bndgrafdat.s.vertlocnnd - 2; reduloctab[0] = 0; /* Assume no memory allocation problem */ bndveexlocsum = bndveexlocsum0 = 0; bndgrafdat.veexloctax = NULL; /* Assume no external gains */ if (orggrafptr->veexloctax != NULL) { if ((bndgrafdat.veexloctax = memAlloc (bndgrafdat.s.vertlocnbr * sizeof (Gnum))) == NULL) { errorPrint ("bdgraphBipartBd: out of memory (1)"); reduloctab[0] = 1; /* Memory error */ } else { Gnum bndvertlocnum; bndgrafdat.veexloctax -= bndgrafdat.s.baseval; for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { Gnum veexval; veexval = orggrafptr->veexloctax[bndgrafdat.s.vnumloctax[bndvertlocnum]]; bndgrafdat.veexloctax[bndvertlocnum] = veexval; bndveexlocsum += veexval; bndveexlocsum0 += veexval & (((Gnum) bndgrafdat.partgsttax[bndvertlocnum]) - 1); } } } reduloctab[1] = bndgrafdat.s.vendloctax[bndvertancnnd] - bndgrafdat.s.vertloctax[bndvertancnnd] - (orggrafptr->s.procglbnbr - 1); /* Anchor degrees */ reduloctab[2] = bndgrafdat.s.vendloctax[bndvertancnnd + 1] - bndgrafdat.s.vertloctax[bndvertancnnd + 1] - (orggrafptr->s.procglbnbr - 1); bndgrafdat.complocsize0 = bndgrafdat.s.vertlocnbr - (bndvertlocnbr1 + 1); /* Add 1 for anchor vertex 1 */ complocsizeadj0 = orggrafptr->complocsize0 - bndgrafdat.complocsize0; /* -1 less because of anchor 0 */ reduloctab[3] = bndgrafdat.complocsize0; reduloctab[4] = bndvertlocancadj; /* Sum increases in size and load */ reduloctab[5] = bndveexlocsum; reduloctab[6] = bndveexlocsum0; if (MPI_Allreduce (reduloctab, reduglbtab, 7, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (1)"); return (1); } if (reduglbtab[0] != 0) { bdgraphExit (&bndgrafdat); return (1); } if ((reduglbtab[1] == 0) || /* If graph is too small to have any usable anchors */ (reduglbtab[2] == 0)) { bdgraphExit (&bndgrafdat); return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); } bndvertglbancadj = reduglbtab[4]; bndgrafdat.veexglbsum = orggrafptr->veexglbsum; /* All external gains preserved */ bndgrafdat.fronlocnbr = orggrafptr->fronlocnbr; /* All separator vertices are kept in band graph */ bndgrafdat.fronglbnbr = orggrafptr->fronglbnbr; bndgrafdat.complocload0 = orggrafptr->complocload0 + bndvertlocancadj; /* All loads are kept in band graph */ bndgrafdat.compglbload0 = orggrafptr->compglbload0 + bndvertglbancadj; bndgrafdat.compglbload0min = orggrafptr->compglbload0min + bndvertglbancadj; /* Tilt extrema loads according to adjustments */ bndgrafdat.compglbload0max = orggrafptr->compglbload0max + bndvertglbancadj; bndgrafdat.compglbload0avg = orggrafptr->compglbload0avg + bndvertglbancadj; /* Tilt average load according to adjustments */ bndgrafdat.compglbload0dlt = orggrafptr->compglbload0dlt; bndgrafdat.compglbsize0 = reduglbtab[3]; bndgrafdat.commglbload = orggrafptr->commglbload; bndgrafdat.commglbgainextn = orggrafptr->commglbgainextn; bndgrafdat.commglbloadextn0 = orggrafptr->commglbloadextn0; bndgrafdat.commglbgainextn0 = orggrafptr->commglbgainextn0; bndgrafdat.bbalglbval = orggrafptr->bbalglbval; bndgrafdat.domndist = orggrafptr->domndist; bndgrafdat.domnwght[0] = orggrafptr->domnwght[0]; bndgrafdat.domnwght[1] = orggrafptr->domnwght[1]; bndgrafdat.levlnum = orggrafptr->levlnum; if (bndgrafdat.veexloctax != NULL) { Gnum bndveexglbanc0; Gnum bndveexglbanc1; bndveexglbanc0 = (orggrafptr->veexglbsum + orggrafptr->commglbgainextn) / 2 - reduglbtab[6]; /* Compute global external gains of anchors */ bndveexglbanc1 = (orggrafptr->veexglbsum - bndveexglbanc0) - reduglbtab[5]; bndgrafdat.veexloctax[bndvertancnnd] = DATASIZE (bndveexglbanc0, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); /* Spread gains across local anchors */ bndgrafdat.veexloctax[bndvertancnnd + 1] = DATASIZE (bndveexglbanc1, bndgrafdat.s.procglbnbr, bndgrafdat.s.proclocnum); } #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (&bndgrafdat) != 0) { errorPrint ("bdgraphBipartBd: internal error (1)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ if (bdgraphBipartSt (&bndgrafdat, paraptr->stratbnd) != 0) { /* Separate distributed band graph */ errorPrint ("bdgraphBipartBd: cannot separate band graph"); bdgraphExit (&bndgrafdat); return (1); } reduloctab[0] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd]; /* Check if anchor vertices remain in their parts */ reduloctab[1] = (Gnum) bndgrafdat.partgsttax[bndvertancnnd + 1]; reduloctab[2] = complocsizeadj0; reduloctab[3] = 0; /* Assume memory allocation is all right */ if ((orgflagloctab = memAlloc (flagSize (orggrafptr->s.vertlocnnd) * sizeof (int))) == NULL) { /* Eventually keep space for based indices */ errorPrint ("bdgraphBipartBd: out of memory (2)"); reduloctab[3] = 1; } if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (2)"); return (1); } if (((reduglbtab[0] + reduglbtab[1]) != orggrafptr->s.procglbnbr) || /* If not all anchors of initial same parts in same parts */ ((reduglbtab[0] != 0) && (reduglbtab[0] != orggrafptr->s.procglbnbr)) || (reduglbtab[3] != 0)) { if (orgflagloctab != NULL) memFree (orgflagloctab); bdgraphExit (&bndgrafdat); /* Apply original strategy to full graph */ return (bdgraphBipartSt (orggrafptr, paraptr->stratorg)); } if (dgraphGhst (&bndgrafdat.s) != 0) { /* Compute ghost edge array if not already present */ errorPrint ("bdgraphBipartBd: cannot compute ghost edge array"); return (1); } if (reduglbtab[0] == orggrafptr->s.procglbnbr) { /* If all anchors swapped parts, swap all parts of original vertices */ Gnum orgvertnum; orggrafptr->complocsize0 = orggrafptr->s.vertlocnbr - reduloctab[2] - bndgrafdat.s.vertlocnbr + bndgrafdat.complocsize0; orggrafptr->compglbsize0 = orggrafptr->s.vertglbnbr - reduglbtab[2] - bndgrafdat.s.vertglbnbr + bndgrafdat.compglbsize0; for (orgvertnum = orggrafptr->s.baseval; orgvertnum < orggrafptr->s.vertlocnnd; orgvertnum ++) orggrafptr->partgsttax[orgvertnum] ^= 1; } else { orggrafptr->complocsize0 = reduloctab[2] + bndgrafdat.complocsize0; orggrafptr->compglbsize0 = reduglbtab[2] + bndgrafdat.compglbsize0; } for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) /* Update part array of all vertices except anchors */ orggrafptr->partgsttax[bndgrafdat.s.vnumloctax[bndvertlocnum]] = bndgrafdat.partgsttax[bndvertlocnum]; dgraphHaloAsync (&orggrafptr->s, (byte *) (orggrafptr->partgsttax + orggrafptr->s.baseval), GRAPHPART_MPI, &requdat); /* Share part array of full graph */ commlocloadintn = commlocloadextn = commlocgainextn = 0; bndedlolocval = 1; /* Assume no edge loads */ for (bndvertlocnum = bndgrafdat.s.baseval; bndvertlocnum < bndvertlvlnum; bndvertlocnum ++) { /* For all vertices of band graph save for last layer */ Gnum bndedgelocnum; Gnum bndedgelocnnd; Gnum bndpartval; bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum]; if (bndgrafdat.veexloctax != NULL) { commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval; commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2); } for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum]; bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) { Gnum bndvertlocend; Gnum bndpartend; bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum]; bndpartend = bndgrafdat.partgsttax[bndvertlocend]; if (bndgrafdat.s.edloloctax != NULL) bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum]; commlocloadintn += (bndpartval ^ bndpartend) * bndedlolocval; /* Internal load is accounted for twice */ } } for ( ; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer, remove internal loads to band vertices once */ Gnum bndedgelocnum; Gnum bndedgelocnnd; Gnum bndpartval; bndpartval = (Gnum) bndgrafdat.partgsttax[bndvertlocnum]; if (bndgrafdat.veexloctax != NULL) { commlocloadextn += bndgrafdat.veexloctax[bndvertlocnum] * bndpartval; commlocgainextn += bndgrafdat.veexloctax[bndvertlocnum] * (1 - bndpartval * 2); } for (bndedgelocnum = bndgrafdat.s.vertloctax[bndvertlocnum], bndedgelocnnd = bndgrafdat.s.vendloctax[bndvertlocnum] - 1; /* "-1" to avoid anchor edges */ bndedgelocnum < bndedgelocnnd; bndedgelocnum ++) { Gnum bndvertlocend; Gnum bndpartend; bndvertlocend = bndgrafdat.s.edgegsttax[bndedgelocnum]; bndpartend = bndgrafdat.partgsttax[bndvertlocend]; if (bndgrafdat.s.edloloctax != NULL) bndedlolocval = bndgrafdat.s.edloloctax[bndedgelocnum]; commlocloadintn -= (bndpartval ^ bndpartend) * bndedlolocval; /* Remove internal loads to band graph vertices once because afterwards they will be accounted for twice */ } } memSet (orgflagloctab, 0, flagSize (orggrafptr->s.vertlocnnd) * sizeof (int)); /* Set vertices as not already considered */ for (bndfronlocnum = orgfronlocnum = 0; bndfronlocnum < bndgrafdat.fronlocnbr; bndfronlocnum ++) { /* Project back separator except for last layer */ Gnum bndvertlocnum; bndvertlocnum = bndgrafdat.fronloctab[bndfronlocnum]; if (bndvertlocnum < bndvertlvlnum) { /* If vertex does not belong to last layer */ Gnum orgvertlocnum; orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum]; flagSet (orgflagloctab, orgvertlocnum); /* Set vertex as processed */ orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; } } if (dgraphHaloWait (&requdat) != 0) { errorPrint ("bdgraphBipartBd: cannot complete asynchronous halo exchange"); return (1); } orgedlolocval = 1; /* Assume no edge loads */ commlocloadintn2 = 0; for (bndvertlocnum = bndvertlvlnum; bndvertlocnum < bndvertancnnd; bndvertlocnum ++) { /* For all vertices of last layer */ Gnum orgedgelocnum; Gnum orgedgelocnnd; Gnum orgvertlocnum; GraphPart orgpartval; Gnum orgflagval; orgvertlocnum = bndgrafdat.s.vnumloctax[bndvertlocnum]; orgpartval = bndgrafdat.partgsttax[bndvertlocnum]; orgflagval = 0; /* Assume vertex does not belong to the frontier */ for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum]; orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) { Gnum orgvertlocend; Gnum orgpartend; Gnum orgflagtmp; orgvertlocend = orggrafptr->s.edgegsttax[orgedgelocnum]; orgpartend = orggrafptr->partgsttax[orgvertlocend]; orgflagtmp = orgpartval ^ orgpartend; if (bndgrafdat.s.edloloctax != NULL) orgedlolocval = orggrafptr->s.edloloctax[orgedgelocnum]; orgflagval |= orgflagtmp; commlocloadintn2 += orgflagtmp * orgedlolocval; /* Internal load to band and original graph vertices are accounted for twice */ if ((orgflagtmp != 0) && (orgvertlocend < orggrafptr->s.vertlocnnd) && (flagVal (orgflagloctab, orgvertlocend) == 0)) { orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocend; flagSet (orgflagloctab, orgvertlocend); } } if ((orgflagval != 0) && (flagVal (orgflagloctab, orgvertlocnum) == 0)) orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; flagSet (orgflagloctab, orgvertlocnum); /* Set vertex as processed anyway */ } commlocloadintn += 2 * commlocloadintn2; /* Add twice the internal load of original graph edges and once the one of band edges (one removed before) */ orggrafptr->complocload0 = bndgrafdat.complocload0 - bndvertlocancadj; orggrafptr->compglbload0 = bndgrafdat.compglbload0 - bndvertglbancadj; orggrafptr->compglbload0dlt = orggrafptr->compglbload0 - orggrafptr->compglbload0avg; orgprocsidnbr = orggrafptr->s.procsidnbr; if (orgprocsidnbr == 0) goto loop_exit; orgvertlocnum = orggrafptr->s.baseval; orgprocsidnum = 0; orgprocsidtab = orggrafptr->s.procsidtab; orgprocsidval = orgprocsidtab[orgprocsidnum ++]; while (1) { /* Scan all vertices which have foreign neighbors */ while (orgprocsidval < 0) { orgvertlocnum -= (Gnum) orgprocsidval; orgprocsidval = orgprocsidtab[orgprocsidnum ++]; } if (flagVal (orgflagloctab, orgvertlocnum) == 0) { /* If vertex not already processed */ Gnum orgedgelocnum; Gnum orgedgelocnnd; GraphPart orgpartval; orgpartval = orggrafptr->partgsttax[orgvertlocnum]; for (orgedgelocnum = orggrafptr->s.vertloctax[orgvertlocnum], orgedgelocnnd = orggrafptr->s.vendloctax[orgvertlocnum]; orgedgelocnum < orgedgelocnnd; orgedgelocnum ++) { if (orggrafptr->partgsttax[orggrafptr->s.edgegsttax[orgedgelocnum]] != orgpartval) { orggrafptr->fronloctab[orgfronlocnum ++] = orgvertlocnum; break; } } } do { if (orgprocsidnum >= orgprocsidnbr) goto loop_exit; } while ((orgprocsidval = orgprocsidtab[orgprocsidnum ++]) >= 0); } loop_exit : memFree (orgflagloctab); reduloctab[0] = commlocloadintn; /* Twice the internal load; sum globally before dividing by two */ reduloctab[1] = commlocloadextn; reduloctab[2] = commlocgainextn; reduloctab[3] = orgfronlocnum; if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 4, GNUM_MPI, MPI_SUM, orggrafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartBd: communication error (3)"); return (1); } orggrafptr->fronlocnbr = orgfronlocnum; orggrafptr->fronglbnbr = reduglbtab[3]; orggrafptr->commglbload = (reduglbtab[0] / 2) * orggrafptr->domndist + reduglbtab[1]; orggrafptr->commglbgainextn = reduglbtab[2]; orggrafptr->bbalglbval = (double) ((orggrafptr->compglbload0dlt < 0) ? (- orggrafptr->compglbload0dlt) : orggrafptr->compglbload0dlt) / (double) orggrafptr->compglbload0avg; #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (orggrafptr) != 0) { errorPrint ("bdgraphBipartBd: internal error (2)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ bdgraphExit (&bndgrafdat); return (0); }
int bdgraphBipartDf ( Bdgraph * const grafptr, /*+ Distributed graph +*/ const BdgraphBipartDfParam * const paraptr) /*+ Method parameters +*/ { float * restrict ielsloctax; /* Inverse of degree array */ float * restrict veexloctax; /* Veexval over domdist */ float * restrict difogsttax; /* Old diffusion value array */ float * restrict difngsttax; /* New diffusion value array */ const Gnum * restrict edgegsttax; Gnum fronlocnum; Gnum veexlocnbr; float vanclocval[2]; float valolocval[2]; /* Fraction of load to remove from anchor vertices at each step */ Gnum vanclocnnd; Gnum vertlocnum; const Gnum * restrict velolocbax; Gnum velolocmsk; const Gnum * restrict edlolocbax; Gnum edlolocmsk; Gnum complocload1; Gnum complocsize1; Gnum commlocloadintn; Gnum commlocloadextn; Gnum commlocgainextn; Gnum reduloctab[6]; Gnum reduglbtab[6]; Gnum passnum; float cdifval; float cremval; int ovflval; /* Overflow flag value */ if (dgraphGhst (&grafptr->s) != 0) { /* Compute ghost edge array if not already present */ errorPrint ("bdgraphBipartDf: cannot compute ghost edge array"); return (1); } reduloctab[0] = grafptr->s.vendloctax[grafptr->s.vertlocnnd - 2] - grafptr->s.vertloctax[grafptr->s.vertlocnnd - 2] - (grafptr->s.procglbnbr - 1); /* Local degree of both anchor vertices, minus edges to other anchors */ reduloctab[1] = grafptr->s.vendloctax[grafptr->s.vertlocnnd - 1] - grafptr->s.vertloctax[grafptr->s.vertlocnnd - 1] - (grafptr->s.procglbnbr - 1); /* Anchor edges have load 1 even for weighted graphs */ if (grafptr->s.veloloctax == NULL) reduloctab[2] = /* Weights of anchors */ reduloctab[3] = 1; else { reduloctab[2] = grafptr->s.veloloctax[grafptr->s.vertlocnnd - 2]; reduloctab[3] = grafptr->s.veloloctax[grafptr->s.vertlocnnd - 1]; } veexlocnbr = (grafptr->veexloctax != NULL) ? grafptr->s.vertlocnbr : 0; if (memAllocGroup ((void **) (void *) &ielsloctax, (size_t) (grafptr->s.vertlocnbr * sizeof (float)), &veexloctax, (size_t) (veexlocnbr * sizeof (float)), &difogsttax, (size_t) (grafptr->s.vertgstnbr * sizeof (float)), &difngsttax, (size_t) (grafptr->s.vertgstnbr * sizeof (float)), NULL) == NULL) { errorPrint ("bdgraphBipartDf: out of memory"); reduloctab[0] = -1; } ielsloctax -= grafptr->s.baseval; difogsttax -= grafptr->s.baseval; difngsttax -= grafptr->s.baseval; veexloctax = (grafptr->veexloctax != NULL) ? (veexloctax - grafptr->s.baseval) : NULL; if (MPI_Allreduce (reduloctab, reduglbtab, 4, GNUM_MPI, MPI_SUM, grafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartDf: communication error (1)"); return (1); } if (reduglbtab[0] < 0) { if (ielsloctax != NULL) memFree (ielsloctax + grafptr->s.baseval); /* Free group leader */ return (1); } if ((reduglbtab[0] == 0) || /* If graph is too small to have any usable anchors, leave partition as is */ (reduglbtab[1] == 0)) { memFree (ielsloctax + grafptr->s.baseval); if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (grafptr->partgsttax + grafptr->s.baseval), GRAPHPART_MPI) != 0) { errorPrint ("bdgraphBipartDf: cannot propagate part data (1)"); return (1); } return (0); } vanclocval[0] = (float) ((paraptr->typeval == BDGRAPHBIPARTDFTYPEBAL) /* If balanced parts wanted */ ? grafptr->compglbload0avg /* Target is average */ : ( (grafptr->compglbload0 < grafptr->compglbload0min) ? grafptr->compglbload0min : /* Else keep load if not off balance */ ((grafptr->compglbload0 > grafptr->compglbload0max) ? grafptr->compglbload0max : grafptr->compglbload0))); vanclocval[1] = (float) grafptr->s.veloglbsum - vanclocval[0]; vanclocval[0] = - vanclocval[0]; /* Part 0 holds negative values */ valolocval[0] = (float) reduglbtab[2]; /* Compute values to remove from anchor vertices */ valolocval[1] = (float) reduglbtab[3] - BDGRAPHBIPARTDFEPSILON; /* Slightly tilt value to add to part 1 */ vanclocnnd = grafptr->s.vertlocnnd - 2; /* Do not account for anchor vertices in diffusion computations */ if (grafptr->s.edloloctax != NULL) { for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) { Gnum edgelocnum; Gnum edgelocnnd; Gnum edlolocsum; #ifdef SCOTCH_DEBUG_BDGRAPH2 if ((grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]) == 0) { errorPrint ("bdgraphBipartDf: internal error (1)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ difogsttax[vertlocnum] = 0.0F; for (edgelocnum = grafptr->s.vertloctax[vertlocnum], edgelocnnd = grafptr->s.vendloctax[vertlocnum], edlolocsum = 0; edgelocnum < edgelocnnd; edgelocnum ++) edlolocsum += grafptr->s.edloloctax[edgelocnum]; ielsloctax[vertlocnum] = 1.0F / (float) edlolocsum; } } else { /* Graph has no edge loads */ for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) { #ifdef SCOTCH_DEBUG_BDGRAPH2 if ((grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]) == 0) { errorPrint ("bdgraphBipartDf: internal error (2)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ ielsloctax[vertlocnum] = 1.0F / (float) (grafptr->s.vendloctax[vertlocnum] - grafptr->s.vertloctax[vertlocnum]); difogsttax[vertlocnum] = 0.0F; } } ielsloctax[vanclocnnd] = 1.0F / (float) reduglbtab[0]; ielsloctax[vanclocnnd + 1] = 1.0F / (float) reduglbtab[1]; difogsttax[vanclocnnd] = vanclocval[0] * ielsloctax[vanclocnnd]; /* Load anchor vertices for first pass */ difogsttax[vanclocnnd + 1] = vanclocval[1] * ielsloctax[vanclocnnd + 1]; difngsttax[vanclocnnd] = /* In case of isolated anchors, do not risk overflow because of NaN */ difngsttax[vanclocnnd + 1] = 0.0F; if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (difogsttax + grafptr->s.baseval), MPI_FLOAT) != 0) { /* Perform initial diffusion (and build communication structures) */ errorPrint ("bdgraphBipartDf: cannot propagate diffusion data (1)"); memFree (ielsloctax + grafptr->s.baseval); /* Free group leader */ return (1); } ovflval = 0; cdifval = paraptr->cdifval; cremval = paraptr->cremval; edgegsttax = grafptr->s.edgegsttax; for (passnum = 0; ; ) { /* For all passes */ if (ovflval == 0) { /* If no overflow occured */ float * diftgsttax; /* Temporary swap value */ Gnum vertlocnum; float veloval; veloval = 1.0F; /* Assume no vertex loads */ for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) { Gnum edgelocnum; Gnum edgelocnnd; float diffval; diffval = 0.0F; edgelocnum = grafptr->s.vertloctax[vertlocnum]; edgelocnnd = grafptr->s.vendloctax[vertlocnum]; if (grafptr->s.edloloctax != NULL) for ( ; edgelocnum < edgelocnnd; edgelocnum ++) diffval += difogsttax[edgegsttax[edgelocnum]] * (float) grafptr->s.edloloctax[edgelocnum]; else for ( ; edgelocnum < edgelocnnd; edgelocnum ++) diffval += difogsttax[edgegsttax[edgelocnum]]; diffval *= cdifval; diffval += (difogsttax[vertlocnum] * cremval) / ielsloctax[vertlocnum]; if (grafptr->s.veloloctax != NULL) veloval = (float) grafptr->s.veloloctax[vertlocnum]; if (diffval >= 0.0F) { diffval = (diffval - veloval) * ielsloctax[vertlocnum]; if (diffval <= 0.0F) diffval = +BDGRAPHBIPARTDFEPSILON; } else { diffval = (diffval + veloval) * ielsloctax[vertlocnum]; if (diffval >= 0.0F) diffval = -BDGRAPHBIPARTDFEPSILON; } if (isnan (diffval)) { /* If overflow occured */ ovflval = 1; /* We are in state of overflow */ goto abort; /* Exit this loop without swapping arrays */ } difngsttax[vertlocnum] = diffval; } for ( ; vertlocnum < grafptr->s.vertlocnnd; vertlocnum ++) { /* For the two local anchor vertices */ Gnum edgelocnum; Gnum edgelocnnd; float diffval; diffval = 0.0F; edgelocnum = grafptr->s.vertloctax[vertlocnum] + grafptr->s.procglbnbr - 1; /* Skip links to other anchors */ edgelocnnd = grafptr->s.vendloctax[vertlocnum]; if (edgelocnum == edgelocnnd) /* If isolated anchor */ continue; /* Barrel is empty */ for ( ; edgelocnum < edgelocnnd; edgelocnum ++) /* Anchor edges have load 1 even for weighted graphs */ diffval += difogsttax[edgegsttax[edgelocnum]]; diffval *= cdifval; diffval += vanclocval[vertlocnum - vanclocnnd] + (difogsttax[vertlocnum] * cremval) / ielsloctax[vertlocnum]; if (diffval >= 0.0F) { diffval = (diffval - valolocval[vertlocnum - vanclocnnd]) * ielsloctax[vertlocnum]; if (diffval <= 0.0F) diffval = +BDGRAPHBIPARTDFEPSILON; } else { diffval = (diffval + valolocval[vertlocnum - vanclocnnd]) * ielsloctax[vertlocnum]; if (diffval >= 0.0F) diffval = -BDGRAPHBIPARTDFEPSILON; } if (isnan (diffval)) { /* If overflow occured */ ovflval = 1; /* We are in state of overflow */ goto abort; /* Exit this loop without swapping arrays */ } difngsttax[vertlocnum] = diffval; } diftgsttax = (float *) difngsttax; /* Swap old and new diffusion arrays */ difngsttax = (float *) difogsttax; /* Casts to prevent IBM compiler from yelling */ difogsttax = (float *) diftgsttax; } abort : /* If overflow occured, resume here */ if (++ passnum >= paraptr->passnbr) /* If maximum number of passes reached */ break; /* Exit main loop */ if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (difogsttax + grafptr->s.baseval), MPI_FLOAT) != 0) { errorPrint ("bdgraphBipartDf: cannot propagate diffusion data (2)"); memFree (ielsloctax + grafptr->s.baseval); /* Free group leader */ return (1); } } for (vertlocnum = grafptr->s.baseval; vertlocnum < vanclocnnd; vertlocnum ++) /* Set new part distribution */ grafptr->partgsttax[vertlocnum] = (difogsttax[vertlocnum] <= 0.0F) ? 0 : 1; grafptr->partgsttax[vanclocnnd] = 0; /* Set up parts in case anchors are isolated */ grafptr->partgsttax[vanclocnnd + 1] = 1; if (grafptr->s.veloloctax != NULL) { velolocbax = grafptr->s.veloloctax; velolocmsk = ~((Gnum) 0); } else { velolocbax = &bdgraphbipartdfloadone; velolocmsk = 0; } if (grafptr->s.edloloctax != NULL) { edlolocbax = grafptr->s.edloloctax; edlolocmsk = ~((Gnum) 0); } else { edlolocbax = &bdgraphbipartdfloadone; edlolocmsk = 0; } memFree (ielsloctax + grafptr->s.baseval); /* Free group leader */ if (dgraphHaloSync (&grafptr->s, (byte *) (void *) (grafptr->partgsttax + grafptr->s.baseval), GRAPHPART_MPI) != 0) { errorPrint ("bdgraphBipartDf: cannot propagate part data (2)"); return (1); } commlocloadintn = commlocloadextn = commlocgainextn = 0; for (vertlocnum = grafptr->s.baseval, fronlocnum = complocsize1 = complocload1 = 0; vertlocnum < grafptr->s.vertlocnnd; vertlocnum ++) { Gnum edgelocnum; Gnum edgelocnnd; Gnum veloval; Gnum partval; Gnum flagval; #ifdef SCOTCH_DEBUG_BDGRAPH2 if (grafptr->partgsttax[vertlocnum] > 1) { errorPrint ("bdgraphBipartDf: internal error (3)"); break; /* Do not break upcoming collective communications */ } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ partval = (Gnum) grafptr->partgsttax[vertlocnum]; veloval = velolocbax[vertlocnum & velolocmsk]; if (grafptr->veexloctax != NULL) { commlocloadextn += grafptr->veexloctax[vertlocnum] * partval; commlocgainextn += grafptr->veexloctax[vertlocnum] * (1 - partval * 2); } complocsize1 += partval; complocload1 += partval * veloval; flagval = 0; for (edgelocnum = grafptr->s.vertloctax[vertlocnum], edgelocnnd = grafptr->s.vendloctax[vertlocnum]; edgelocnum < edgelocnnd; edgelocnum ++) { Gnum edloval; Gnum partend; partend = (Gnum) grafptr->partgsttax[edgegsttax[edgelocnum]]; #ifdef SCOTCH_DEBUG_BDGRAPH2 if (partend > 1) { errorPrint ("bdgraphBipartDf: internal error (4)"); vertlocnum = grafptr->s.vertlocnnd; break; /* Do not break upcoming collective communications */ } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ edloval = edlolocbax[edgelocnum & edlolocmsk]; flagval |= partval ^ partend; commlocloadintn += (partval ^ partend) * edloval; /* Internal load is accounted for twice */ } if (flagval != 0) /* If vertex has neighbors in other part */ grafptr->fronloctab[fronlocnum ++] = vertlocnum; /* Record it as member of separator */ } grafptr->complocload0 = grafptr->s.velolocsum - complocload1; grafptr->complocsize0 = grafptr->s.vertlocnbr - complocsize1; grafptr->fronlocnbr = fronlocnum; reduloctab[0] = fronlocnum; reduloctab[1] = grafptr->complocload0; reduloctab[2] = grafptr->complocsize0; reduloctab[3] = commlocloadintn; /* Twice the internal load; sum globally before dividing by two */ reduloctab[4] = commlocloadextn; reduloctab[5] = commlocgainextn; if (MPI_Allreduce (&reduloctab[0], &reduglbtab[0], 6, GNUM_MPI, MPI_SUM, grafptr->s.proccomm) != MPI_SUCCESS) { errorPrint ("bdgraphBipartDf: communication error (2)"); return (1); } grafptr->fronglbnbr = reduglbtab[0]; grafptr->compglbload0 = reduglbtab[1]; grafptr->compglbload0dlt = grafptr->compglbload0 - grafptr->compglbload0avg; grafptr->compglbsize0 = reduglbtab[2]; grafptr->commglbload = (reduglbtab[3] / 2) * grafptr->domdist + reduglbtab[4]; grafptr->commglbgainextn = reduglbtab[5]; #ifdef SCOTCH_DEBUG_BDGRAPH2 if (bdgraphCheck (grafptr) != 0) { errorPrint ("bdgraphBipartDf: internal error (5)"); return (1); } #endif /* SCOTCH_DEBUG_BDGRAPH2 */ return (0); }