void Zoltan_Input_HG_Init(ZHG *zhg) { zhg->nObj = 0; zhg->globalObj = 0; zhg->objWeightDim = 0; zhg->objWeight = NULL; zhg->objGNO = NULL; zhg->objGID = NULL; zhg->objLID = NULL; zhg->numHEdges= NULL; zhg->coor = NULL; zhg->fixed = NULL; zhg->GnRepartVtx = 0; zhg->GnRepartEdge = 0; zhg->Input_Parts = NULL; zhg->Output_Parts = NULL; zhg->AppObjSizes = NULL; zhg->showMoveVol = 0; zhg->nHedges = 0; zhg->globalHedges = 0; zhg->edgeGNO = NULL; zhg->Esize = NULL; zhg->edgeWeightDim = 0; zhg->Ewgt = NULL; zhg->pinGNO = NULL; zhg->Pin_Procs = NULL; zhg->nPins = 0; zhg->globalPins = 0; zhg->nRecv_GNOs = 0; zhg->Recv_GNOs = NULL; zhg->VtxPlan = NULL; Zoltan_HG_HGraph_Init(&zhg->HG); }
static int Zoltan_PHG_Redistribute_Hypergraph( ZZ *zz, PHGPartParams *hgp, /* Input: parameters; used only for UseFixedVtx */ HGraph *ohg, /* Input: Local part of distributed hypergraph */ int firstproc, /* Input: rank (in ocomm) of the first proc of the ncomm*/ int *v2Col, /* Input: Vertex to processor Column Mapping */ int *n2Row, /* Input: Net to processor Row Mapping */ PHGComm *ncomm, /* Input: communicators of new distribution */ HGraph *nhg, /* Output: Newly redistributed hypergraph */ int **vmap, /* Output: allocated with the size nhg->nVtx and vertex map from nhg to ohg's local vertex number*/ int **vdest /* Output: allocated with the size nhg->nVtx and stores dest proc in ocomm */ ) { char * yo = "Zoltan_PHG_Redistribute_Hypergraph"; PHGComm *ocomm = ohg->comm; int ierr=ZOLTAN_OK; int i, v, n, nPins, nsend, elemsz, nVtx, nEdge; int msg_tag = 9999; int *proclist=NULL, *sendbuf=NULL; int *vno=NULL, *nno=NULL, *dist_x=NULL, *dist_y=NULL, *vsn=NULL, *nsn=NULL, *pins=NULL, *cnt=NULL; ZOLTAN_COMM_OBJ *plan; Zoltan_HG_HGraph_Init (nhg); nhg->comm = ncomm; nhg->dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int)); nhg->dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int)); dist_x = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int)); dist_y = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int)); vsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(int)); nsn = (int *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(int)); vno = (int *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(int)); nno = (int *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(int)); if (!nhg->dist_x || !nhg->dist_y || !dist_x || !dist_y || !vsn || !nsn || (ohg->nVtx && !vno) || (ohg->nEdge && !nno) ) { uprintf(ocomm, " new comm nProcx=%d nProcy=%d nvtx=%d nedge=%d", ncomm->nProc_x, ncomm->nProc_y, ohg->nVtx, ohg->nEdge); MEMORY_ERROR; } for (v = 0; v < ohg->nVtx; ++v) ++dist_x[v2Col[v]]; for (n = 0; n < ohg->nEdge; ++n) ++dist_y[n2Row[n]]; /* UVCUVC: CHECK ASSUMPTION This code assumes that the objects in the receive buffer of Zoltan_Comm_Do function are 1- in the increasing processor order, 2- order of the items send by a processor is preserved. */ /* compute prefix sum to find new vertex start numbers; for each processor */ MPI_Scan(dist_x, vsn, ncomm->nProc_x, MPI_INT, MPI_SUM, ocomm->row_comm); /* All reduce to compute how many each processor will have */ MPI_Allreduce(dist_x, &(nhg->dist_x[1]), ncomm->nProc_x, MPI_INT, MPI_SUM, ocomm->row_comm); nhg->dist_x[0] = 0; for (i=1; i<=ncomm->nProc_x; ++i) nhg->dist_x[i] += nhg->dist_x[i-1]; MPI_Scan(dist_y, nsn, ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm); MPI_Allreduce(dist_y, &(nhg->dist_y[1]), ncomm->nProc_y, MPI_INT, MPI_SUM, ocomm->col_comm); nhg->dist_y[0] = 0; for (i=1; i<=ncomm->nProc_y; ++i) nhg->dist_y[i] += nhg->dist_y[i-1]; #ifdef _DEBUG1 PrintArr(ocomm, "vsn", vsn, ncomm->nProc_x); PrintArr(ocomm, "nsn", nsn, ncomm->nProc_y); #endif /* find mapping of current LOCAL vertex no (in my node) to "new" vertex no LOCAL to dest node*/ for (v = ohg->nVtx-1; v>=0; --v) vno[v] = --vsn[v2Col[v]]; for (n = ohg->nEdge-1; n>=0; --n) nno[n] = --nsn[n2Row[n]]; nsend = MAX(MAX(ohg->nPins, ohg->nVtx), ohg->nEdge); elemsz = MAX(MAX(2, ohg->VtxWeightDim), ohg->EdgeWeightDim); elemsz = (sizeof(float)>sizeof(int)) ? sizeof(float)*elemsz : sizeof(int)*elemsz; proclist = (int *) ZOLTAN_MALLOC(nsend * sizeof(int)); sendbuf = (int *) ZOLTAN_MALLOC(nsend * elemsz); /* first communicate pins */ nPins = 0; for (v = 0; v < ohg->nVtx; ++v) { for (i = ohg->vindex[v]; i < ohg->vindex[v+1]; ++i) { #ifdef _DEBUG1 if ((n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])<0 || (n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])>=ocomm->nProc) errexit("vertex %d vedge[%d]=%d n2Row=%d #Proc_x=%d v2Col=%d", i, ohg->vedge[i], n2Row[ohg->vedge[i]], ncomm->nProc_x , v2Col[v]); #endif proclist[nPins] = firstproc + n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v]; sendbuf[2*nPins] = vno[v]; sendbuf[2*nPins+1]= nno[ohg->vedge[i]]; ++nPins; } } #ifdef _DEBUG1 if (nPins!=ohg->nPins) { uprintf(ocomm, "sanity check failed nPins(%d)!=hg->nPins(%d)\n", nPins, ohg->nPins); errexit("terminating"); } #endif --msg_tag; ierr |= Zoltan_Comm_Create(&plan, ohg->nPins, proclist, ocomm->Communicator, msg_tag, &nPins); #ifdef _DEBUG1 if (ncomm->myProc==-1 && nPins>1) { /* this processor is not in new comm but receiving data?*/ uprintf(ocomm, "Something wrong; why I'm receiving data nPins=%d\n", nPins); errexit("terminating"); } #endif if (nPins && (pins = (int *) ZOLTAN_MALLOC(nPins * 2 * sizeof(int)))==NULL) MEMORY_ERROR; --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, 2*sizeof(int), (char *) pins); Zoltan_Comm_Destroy(&plan); /* now communicate vertex map */ nsend = 0; if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */ for (v = 0; v < ohg->nVtx; ++v) { proclist[nsend] = firstproc+v2Col[v]; sendbuf[nsend++] = ohg->vmap[v]; } } --msg_tag; ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator, msg_tag, &nVtx); #ifdef _DEBUG1 if (ncomm->myProc==-1 && nVtx>1) { /* this processor is not in new comm but receiving data?*/ uprintf(ocomm, "Something wrong; why I'm receiving data nVtx=%d\n", nVtx); errexit("terminating"); } #endif /* those are only needed in the first row of ncomm */ *vmap = *vdest = NULL; if (!ncomm->myProc_y && nVtx && (!(*vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int))) || !(*vdest = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int))))) MEMORY_ERROR; --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int), (char *) *vmap); if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */ for (v = 0; v < ohg->nVtx; ++v) sendbuf[v] = ocomm->myProc; } --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int), (char *) *vdest); if (ncomm->myProc!=-1) { /* I'm in the new comm */ /* ncomm's first row now bcast to other rows */ MPI_Bcast(&nVtx, 1, MPI_INT, 0, ncomm->col_comm); #ifdef _DEBUG1 if (nVtx!=(nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x])) errexit("nVtx(%d)!= nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x](%d)", nVtx, nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]); #endif if (nVtx && (nhg->vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))==NULL) MEMORY_ERROR; for (i=0; i<nVtx; ++i) nhg->vmap[i] = i; } /* now communicate vertex weights */ if (ohg->VtxWeightDim) { if (nVtx) nhg->vwgt = (float*) ZOLTAN_MALLOC(nVtx*ohg->VtxWeightDim*sizeof(float)); --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->vwgt, ohg->VtxWeightDim*sizeof(float), (char *) nhg->vwgt); if (ncomm->myProc!=-1) /* ncomm's first row now bcast to other rows */ MPI_Bcast(nhg->vwgt, nVtx*ohg->VtxWeightDim, MPI_FLOAT, 0, ncomm->col_comm); } /* communicate fixed vertices, if any */ if (hgp->UseFixedVtx) { if (nVtx) nhg->fixed_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int)); --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->fixed_part, sizeof(int), (char *) nhg->fixed_part); if (ncomm->myProc!=-1) /* ncomm's first row now bcast to other rows */ MPI_Bcast(nhg->fixed_part, nVtx, MPI_INT, 0, ncomm->col_comm); } /* communicate pref parts, if any */ if (hgp->UsePrefPart) { if (nVtx) nhg->pref_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int)); --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->pref_part, sizeof(int), (char *) nhg->pref_part); if (ncomm->myProc!=-1) /* ncomm's first row now bcast to other rows */ MPI_Bcast(nhg->pref_part, nVtx, MPI_INT, 0, ncomm->col_comm); } /* this comm plan is no longer needed. */ Zoltan_Comm_Destroy(&plan); if (ohg->EdgeWeightDim) { /* now communicate edge weights */ nsend = 0; if (!ocomm->myProc_x) /* only first column sends to first column of ncomm */ for (n = 0; n < ohg->nEdge; ++n) proclist[nsend++] = firstproc + n2Row[n]*ncomm->nProc_x; --msg_tag; ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator, msg_tag, &nEdge); #ifdef _DEBUG1 if (ncomm->myProc==-1 && nEdge>1) { /* this processor is not in new comm but receiving data?*/ uprintf(ocomm, "Something wrong; why I'm receiving data nEdge=%d\n", nEdge); errexit("terminating"); } #endif if (ncomm->myProc!=-1) { /* if we're in the new comm */ /* ncomm's first column now bcast to other columns */ MPI_Bcast(&nEdge, 1, MPI_INT, 0, ncomm->row_comm); #ifdef _DEBUG1 if (nEdge != (nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y])) errexit("nEdge(%d)!=nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y](%d)", nEdge, nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]); #endif } if (nEdge) nhg->ewgt = (float*) ZOLTAN_MALLOC(nEdge*ohg->EdgeWeightDim*sizeof(float)); --msg_tag; Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->ewgt, ohg->EdgeWeightDim*sizeof(float), (char *) nhg->ewgt); if (ncomm->myProc!=-1) { /* if we're in the new comm */ /* ncomm's first column now bcast to other columns */ if (nEdge) MPI_Bcast(nhg->ewgt, nEdge*ohg->EdgeWeightDim, MPI_FLOAT, 0, ncomm->row_comm); } Zoltan_Comm_Destroy(&plan); } else nEdge = (ncomm->myProc==-1) ? 0 : nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]; if (ncomm->myProc==-1) { #ifdef _DEBUG1 if (nPins || nVtx || nEdge) errexit("I should not have any data: hey nPins=%d nVtx=%d nEdge=%d\n", nPins, nVtx, nEdge); #endif nhg->nEdge = nhg->nVtx = nhg->nPins = 0; } else { nhg->nEdge = nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]; nhg->nVtx = nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]; nhg->nPins = nPins; /* Unpack the pins received. */ cnt = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int)); nhg->vindex = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int)); nhg->vedge = (int *) ZOLTAN_MALLOC(nhg->nPins * sizeof(int)); if (!cnt || !nhg->vindex || (nPins && !nhg->vedge)) MEMORY_ERROR; /* Count the number of pins per vertex */ for (i = 0; i < nPins; ++i) ++cnt[pins[2*i]]; /* Compute prefix sum to represent hindex correctly. */ for (i = 0; i < nhg->nVtx; ++i) { nhg->vindex[i+1] = nhg->vindex[i] + cnt[i]; cnt[i] = nhg->vindex[i]; } for (i = 0; i < nPins; ++i) nhg->vedge[cnt[pins[2*i]]++] = pins[2*i+1]; nhg->info = ohg->info; nhg->VtxWeightDim = ohg->VtxWeightDim; nhg->EdgeWeightDim = ohg->EdgeWeightDim; ierr = Zoltan_HG_Create_Mirror(zz, nhg); if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) MEMORY_ERROR; } End: Zoltan_Multifree(__FILE__, __LINE__, 10, &proclist, &sendbuf, &pins, &cnt, &vno, &nno, &dist_x, &dist_y, &vsn, &nsn ); return ierr; }
static int gather_and_build_remap( ZZ *zz, int *new_map, /* Upon return, flag indicating whether parts assignments were changed due to remap. */ int HEcnt, /* # of HEs allocated. */ int *HEinfo /* Array of HE info; for each HE, two pins and one edge weight. Stored as a single vector to minimize communication calls. */ ) { char *yo = "gather_and_remap"; int ierr = ZOLTAN_OK; int i, uidx, tmp; int *each_size = NULL; /* sizes (# HEs * HEINFO_ENTRIES) for each proc */ int *recvbuf = NULL; /* Receive buffer for gatherv */ int *displs = NULL; /* Displacement buffer for gatherv */ int send_size; /* Local # HEs * HEINFO_ENTRIES */ int total_size; /* Total # ints in gatherv */ int total_HEcnt; /* Total (across all procs) number of HEs. */ int max0, max1; /* Max values of pin 0 and pin 1 for each HE. */ int *match = NULL; /* Vector describing the matching. match[i] = j ==> match[j] = i ==> vertices i and j are matched. */ int *used = NULL; /* Vector indicating which partitions are used in the matching. */ int limit; /* Maximum number of matches that are allowed */ HGraph hg; /* Hypergraph for matching */ float before_remap = 0, /* Amount of data that overlaps between old and */ after_remap = 0; /* new decomposition before and after remapping, respectively. */ float with_oldremap = 0; /* Amount of data that overlaps between old and new decomposition using the OldRemap vector (remapping from the previous decomposition). */ /* Gather HEs from each processor into a local complete HG. */ each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int)); if (!each_size) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } send_size = HEcnt * HEINFO_ENTRIES; MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT, zz->Communicator); for (total_size = 0, i = 0; i < zz->Num_Proc; i++) { total_size += each_size[i]; } recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int)); displs = recvbuf + total_size; if (!recvbuf) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } displs[0] = 0; for (i = 1; i < zz->Num_Proc; i++) displs[i] = displs[i-1] + each_size[i-1]; MPI_Allgatherv(HEinfo, send_size, MPI_INT, recvbuf, each_size, displs, MPI_INT, zz->Communicator); total_HEcnt = total_size / HEINFO_ENTRIES; for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) { tmp = i * HEINFO_ENTRIES; if (recvbuf[tmp] > max0) max0 = recvbuf[tmp]; if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1]; } /* Increment max0 and max1 so that they are the maximum number of unique pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */ max0++; max1++; /* Sanity check */ /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes * does not return the correct number of non-empty partitions, allowing * max1 to be less than LB.Num_Global_Parts. * (e.g., ewgt.adaptive-partlocal1-v3.4.?). */ if (max1 > zz->LB.Num_Global_Parts) ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1."); /* Set up global HG */ Zoltan_HG_HGraph_Init(&hg); if (total_HEcnt) { hg.nVtx = max0 + zz->LB.Num_Global_Parts; hg.nEdge = total_HEcnt; hg.nPins = total_HEcnt * 2; /* two pins per HE */ hg.EdgeWeightDim = 1; hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float)); hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int)); hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int)); if (!hg.ewgt || !hg.hindex || !hg.hvertex) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } for (i = 0; i < total_HEcnt; i++) { tmp = i * HEINFO_ENTRIES; hg.hindex[i] = i+i; hg.hvertex[i+i] = recvbuf[tmp]; hg.hvertex[i+i+1] = recvbuf[tmp+1]+max0; hg.ewgt[i] = recvbuf[tmp+2]; } hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt; ierr = Zoltan_HG_Create_Mirror(zz, &hg); if (ierr < 0) goto End; } before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE"); /* Compute the amount of overlap when using the old remap vector. */ with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD"); /* Do matching */ match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int)); used = match + hg.nVtx; if (hg.nVtx && !match) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } /* Max # matches allowed */ limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); do_match(zz, &hg, match, limit); /* Build remapping vector, if non-trivial matching was returned. */ *new_map = 0; for (i = 0; i < zz->LB.Num_Global_Parts; i++) if (match[i+max0] != i+max0) { *new_map = 1; break; } if (*new_map) { zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int)); if (!(zz->LB.Remap)) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error."); ierr = ZOLTAN_MEMERR; goto End; } /* First, process all parts that were matched. Mark matched parts as used.*/ for (i = 0; i < zz->LB.Num_Global_Parts; i++) { zz->LB.Remap[i] = -1; tmp = match[i+max0]; if (tmp != i+max0) { zz->LB.Remap[i] = tmp; used[tmp] = 1; } } /* Second, process unmatched parts; if possible, keep same part number. */ for (i = 0; i < zz->LB.Num_Global_Parts; i++) { if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */ /* match[i+max0] == i+max0 */ if (!used[i]) { /* Keep the same part number if it is not used */ zz->LB.Remap[i] = i; used[i] = 1; } } /* Third, process remaining unmatched parts; assign them to unused partitions.*/ for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) { if (zz->LB.Remap[i] > -1) continue; /* Already processed part i */ /* match[i+max0] == i+max0 */ while (used[uidx]) uidx++; /* Find next unused partition */ zz->LB.Remap[i] = uidx; used[uidx] = 1; } } if (*new_map) after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER "); if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) { /* No benefit from remapping; don't keep it! */ ZOLTAN_FREE(&zz->LB.Remap); ZOLTAN_FREE(&zz->LB.OldRemap); *new_map = 0; } else if (with_oldremap >= after_remap) { /* The old remap vector is better than the new one; keep the old one. */ ZOLTAN_FREE(&zz->LB.Remap); zz->LB.Remap = zz->LB.OldRemap; zz->LB.OldRemap = NULL; *new_map = 1; } else { /* Going to use the new remap vector; free the old one. */ ZOLTAN_FREE(&zz->LB.OldRemap); } if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc && zz->LB.Remap) for (i = 0; i < zz->LB.Num_Global_Parts; i++) printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]); End: ZOLTAN_FREE(&match); ZOLTAN_FREE(&each_size); ZOLTAN_FREE(&recvbuf); Zoltan_HG_HGraph_Free(&hg); return ierr; }
int Zoltan_PHG_Gather_To_All_Procs( ZZ *zz, HGraph *phg, /* Input: Local part of distributed hypergraph */ PHGPartParams *hgp, /* Input: Hypergraph parameters */ PHGComm *scomm, /* Input: Serial PHGComm for use by shg. */ HGraph **gathered_hg /* Output: combined hypergraph combined to proc */ ) { /* * Function to gather distributed hypergraph onto each processor for * coarsest partitioning. * First hypergraph arrays for the hypergraph on a column of processors * are built using MPI_Allgathers down the processor columns. * These hypergraph arrays contain complete info about a subset of vertices. * Second the column hypergraphs are gathered along processor rows. * Each processor then has a complete description of the hypergraph. */ char *yo = "Zoltan_PHG_Gather_To_All_Procs"; int ierr = ZOLTAN_OK; int i, tmp, sum; int *each = NULL, *disp = NULL; /* Size and displacement arrays for MPI_Allgatherv */ int *send_buf = NULL; /* Buffer of values to be sent */ int send_size; /* Size of buffer send_buf */ int *col_vedge = NULL; /* vedge array for the proc-column hypergraph */ int *col_vindex = NULL; /* vindex array for the proc-column hypergraph */ int *col_hvertex = NULL; /* hvertex array for the proc-column hypergraph */ int *col_hindex = NULL; /* hindex array for the proc-column hypergraph */ int col_nVtx; /* Number of vertices in processor column */ int col_nEdge; /* Number of edges in processor column */ int col_nPin; /* Number of pins in processor column */ int *recv_size = NULL; /* nPins for each proc in col or row */ HGraph *shg; /* Pointer to the serial hypergraph to be returned by this function. */ int myProc_x = phg->comm->myProc_x; int nProc_x = phg->comm->nProc_x; int nProc_y = phg->comm->nProc_y; int max_nProc_xy = MAX(nProc_x, nProc_y); if (phg->comm->nProc == 1) { ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Do not call this routine on one proc."); return ZOLTAN_FATAL; } #ifdef KDDKDD_CHECK Zoltan_HG_Print(zz, phg, NULL, stdout, "GatherBefore");/* NULL parts for now; add non-NULL later */ #endif /****************************************************************** * 0. Allocate the hypergraph to be returned. * Set values that we already know. ******************************************************************/ shg = *gathered_hg = (HGraph *) ZOLTAN_MALLOC(sizeof(HGraph)); if (!shg) MEMORY_ERROR; Zoltan_HG_HGraph_Init(shg); shg->nVtx = phg->dist_x[nProc_x]; /* TODO64 - can this exceed 2B? */ shg->nEdge = phg->dist_y[nProc_y]; shg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE)); shg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE)); if (!shg->dist_x || !shg->dist_y) MEMORY_ERROR; shg->dist_x[0] = shg->dist_y[0] = 0; shg->dist_x[1] = shg->nVtx; shg->dist_y[1] = shg->nEdge; shg->comm = scomm; shg->EdgeWeightDim = phg->EdgeWeightDim; shg->VtxWeightDim = phg->VtxWeightDim; if (shg->VtxWeightDim && shg->nVtx) shg->vwgt = (float *) ZOLTAN_MALLOC(shg->nVtx * shg->VtxWeightDim * sizeof(float)); if (shg->EdgeWeightDim && shg->nEdge) shg->ewgt = (float *) ZOLTAN_MALLOC(shg->nEdge * shg->EdgeWeightDim * sizeof(float)); /* Fixed vertices */ shg->bisec_split = phg->bisec_split; if (hgp->UseFixedVtx) shg->fixed_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int)); if (hgp->UsePrefPart) shg->pref_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int)); /* Allocate arrays for use in gather operations */ recv_size = (int *) ZOLTAN_MALLOC(3 * max_nProc_xy * sizeof(int)); each = recv_size + max_nProc_xy; disp = each + max_nProc_xy; /* TODO64 - phg->dist_y[nProc_y] could exceed 2 Billion, NO? */ send_size = MAX(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x], phg->dist_y[nProc_y]); send_buf = (int *) ZOLTAN_MALLOC(send_size * sizeof(int)); if ((shg->VtxWeightDim && shg->nVtx && !shg->vwgt) || (shg->EdgeWeightDim && shg->nEdge && !shg->ewgt) || !recv_size || (send_size && !send_buf)) MEMORY_ERROR; /************************************************************* * 1. Gather all non-zeros for vertices in processor column * *************************************************************/ if (nProc_y == 1) { /* * Don't need a gather; just set pointers appropriately for row-gather * in Step 2 below. */ col_nVtx = phg->nVtx; col_nEdge = phg->nEdge; col_nPin = phg->nPins; col_vindex = phg->vindex; col_vedge = phg->vedge; col_hindex = phg->hindex; col_hvertex = phg->hvertex; for (i = 0; i < shg->EdgeWeightDim * shg->nEdge; i++) shg->ewgt[i] = phg->ewgt[i]; } else { /* Gather local size info for each proc in column */ MPI_Allgather(&(phg->nPins), 1, MPI_INT, recv_size, 1, MPI_INT, phg->comm->col_comm); /* Compute number of vtx, edge, and nnz in column */ col_nVtx = (int)(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x]); col_nEdge = phg->dist_y[nProc_y]; /* SCHEMEA */ col_nPin = 0; for (i = 0; i < nProc_y; i++) { col_nPin += recv_size[i]; } /* Allocate arrays for column hypergraph */ col_hindex = (int *) ZOLTAN_CALLOC((col_nEdge+1), sizeof(int)); col_hvertex = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int)); col_vindex = (int *) ZOLTAN_CALLOC((col_nVtx+1), sizeof(int)); col_vedge = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int)); if (!col_vindex || !col_hindex || (col_nPin && (!col_vedge || !col_hvertex))) MEMORY_ERROR; /* Gather hvertex data for all procs in column */ /* SCHEMEA uses same vertex LNO on each proc in column. */ /* SCHEMEB would require conversion from vertex LNO to GNO here. */ disp[0] = 0; for (i = 1; i < nProc_y; i++) disp[i] = disp[i-1] + recv_size[i-1]; MPI_Allgatherv(phg->hvertex, phg->nPins, MPI_INT, col_hvertex, recv_size, disp, MPI_INT, phg->comm->col_comm); /* SCHEMEA uses same vertex LNO on each proc in column. */ /* SCHEMEB would require conversion from vertex GNO to LNO here */ /* Gather hindex data for all procs in column */ for (i = 0; i < phg->nEdge; i++) send_buf[i] = phg->hindex[i+1] - phg->hindex[i]; /* SCHEMEA can assume a recv for each edge; * SCHEMEB needs to gather the number of edges recv'd from each proc. */ for (i = 0; i < nProc_y; i++) each[i] = phg->dist_y[i+1] - phg->dist_y[i]; disp[0] = 0; /* Can't use dist_y because it may not be sizeof(int) */ for (i=1; i < nProc_y; i++){ disp[i] = disp[i-1] + each[i-1]; } /* SCHEMEA can use phg->dist_y for displacement array. * SCHEMEB requires separate displacement array. */ MPI_Allgatherv(send_buf, phg->nEdge, MPI_INT, col_hindex, each, disp, MPI_INT, phg->comm->col_comm); /* Perform prefix sum on col_hindex */ sum = 0; for (i = 0; i < col_nEdge; i++) { tmp = col_hindex[i]; col_hindex[i] = sum; sum += tmp; } col_hindex[col_nEdge] = sum; /* Sanity check */ if (col_hindex[col_nEdge] != col_nPin) { printf("%d Sanity check failed: " "col_hindex[col_nEdge] %d != col_nPin %d\n", zz->Proc, col_hindex[col_nEdge], col_nPin); exit(-1); } /* Gather edge weights, if any. */ if (shg->EdgeWeightDim) { /* Can use nearly the same each array. */ /* Need to compute new disp array. */ disp[0] = 0; each[0] *= phg->EdgeWeightDim; for (i = 1; i < nProc_y; i++) { each[i] *= phg->EdgeWeightDim; disp[i] = disp[i-1] + each[i-1]; } MPI_Allgatherv(phg->ewgt, phg->nEdge*phg->EdgeWeightDim, MPI_FLOAT, shg->ewgt, each, disp, MPI_FLOAT, phg->comm->col_comm); } Zoltan_HG_Mirror(col_nEdge, col_hindex, col_hvertex, col_nVtx, col_vindex, col_vedge); } /* End column-gather */ /************************************************************* * 2. Gather all non-zeros for edges in processor rows * * All processors in a processor column now have the same * * hypergraph; we now gather it across rows. * *************************************************************/ if (nProc_x == 1) { /* * Don't need a gather across the row; just set pointers appropriately * in shg. */ shg->vindex = col_vindex; shg->vedge = col_vedge; shg->hindex = col_hindex; shg->hvertex = col_hvertex; /* Copy vwgt and fixed arrays so shg owns this memory */ for (i = 0; i < shg->VtxWeightDim*shg->nVtx; i++) shg->vwgt[i] = phg->vwgt[i]; if (hgp->UseFixedVtx) for (i = 0; i < shg->nVtx; i++) shg->fixed_part[i] = phg->fixed_part[i]; if (hgp->UsePrefPart) for (i = 0; i < shg->nVtx; i++) shg->pref_part[i] = phg->pref_part[i]; } else { /* Gather info about size within the row */ MPI_Allgather(&col_nPin, 1, MPI_INT, recv_size, 1, MPI_INT, phg->comm->row_comm); tmp = 0; for (i = 0; i < nProc_x; i++) tmp += recv_size[i]; shg->nPins = tmp; shg->vindex = (int *) ZOLTAN_CALLOC((shg->nVtx+1), sizeof(int)); shg->vedge = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int)); shg->hindex = (int *) ZOLTAN_CALLOC((shg->nEdge+1), sizeof(int)); shg->hvertex = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int)); if (!shg->vindex || !shg->hindex || (shg->nPins && (!shg->vedge || !shg->hvertex))) MEMORY_ERROR; /* Gather vedge data for all procs in row */ /* SCHEMEA can send local edge numbers; SCHEMEB requires edge LNO to GNO conversion. */ disp[0] = 0; for (i = 1; i < nProc_x; i++) disp[i] = disp[i-1] + recv_size[i-1]; MPI_Allgatherv(col_vedge, col_nPin, MPI_INT, shg->vedge, recv_size, disp, MPI_INT, phg->comm->row_comm); /* Gather vindex data for all procs in row */ for (i = 0; i < col_nVtx; i++) send_buf[i] = col_vindex[i+1] - col_vindex[i]; /* SCHEMEA can assume a recv for each vertex; * SCHEMEB would need to gather the number of vtxs recv'd from each proc. */ for (i = 0; i < nProc_x; i++) each[i] = (int)(phg->dist_x[i+1] - phg->dist_x[i]); disp[0] = 0; /* Can't use dist_x, may not be sizeof(int) */ for (i = 1; i < nProc_x; i++) disp[i] = disp[i-1] + each[i-1]; /* SCHEMEA can use phg->dist_x as displacement array; * SCHEMEB requires separate displacement array. */ MPI_Allgatherv(send_buf, col_nVtx, MPI_INT, shg->vindex, each, disp, MPI_INT, phg->comm->row_comm); /* Perform prefix sum on shg->vindex */ sum = 0; for (i = 0; i < shg->nVtx; i++) { tmp = shg->vindex[i]; shg->vindex[i] = sum; sum += tmp; } shg->vindex[shg->nVtx] = sum; /* Sanity check */ if (shg->vindex[shg->nVtx] != shg->nPins) { printf("%d Sanity check failed: " "shg->vindex %d != nPins %d\n", zz->Proc, shg->vindex[shg->nVtx], shg->nPins); exit(-1); } /* Gather fixed array, if any */ if (hgp->UseFixedVtx){ #ifdef DEBUG_ uprintf(phg->comm, "Debug in PHG_gather before gather. phg->fixed ="); for (i=0; i<phg->nVtx; i++){ printf(" %d ", phg->fixed_part[i]); } printf("\n"); #endif /* Can use the same each array. */ /* Need to compute new disp array. */ disp[0] = 0; for (i = 1; i < nProc_x; i++) { disp[i] = disp[i-1] + each[i-1]; } MPI_Allgatherv(phg->fixed_part, phg->nVtx, MPI_FLOAT, shg->fixed_part, each, disp, MPI_FLOAT, phg->comm->row_comm); #ifdef DEBUG_ uprintf(phg->comm, "Debug in PHG_gather after gather. shg->fixed ="); for (i=0; i<shg->nVtx; i++){ printf(" %d ", shg->fixed_part[i]); } printf("\n"); #endif } /* Gather pref part array, if any */ if (hgp->UsePrefPart){ /* Can use the same each array. */ /* Need to compute new disp array. */ disp[0] = 0; for (i = 1; i < nProc_x; i++) { disp[i] = disp[i-1] + each[i-1]; } MPI_Allgatherv(phg->pref_part, phg->nVtx, MPI_FLOAT, shg->pref_part, each, disp, MPI_FLOAT, phg->comm->row_comm); } /* Gather vertex weights, if any. */ if (shg->VtxWeightDim) { /* Can use nearly the same each array. */ /* Need to compute new disp array. */ disp[0] = 0; each[0] *= phg->VtxWeightDim; for (i = 1; i < nProc_x; i++) { each[i] *= phg->VtxWeightDim; disp[i] = disp[i-1] + each[i-1]; } MPI_Allgatherv(phg->vwgt, phg->nVtx*phg->VtxWeightDim, MPI_FLOAT, shg->vwgt, each, disp, MPI_FLOAT, phg->comm->row_comm); } Zoltan_HG_Mirror(shg->nVtx, shg->vindex, shg->vedge, shg->nEdge, shg->hindex, shg->hvertex); } /* End row gather */ #ifdef KDDKDD_CHECK Zoltan_HG_Print(zz, shg, NULL, stdout, "GatherAfter");/* NULL parts for now; add non-NULL later */ Zoltan_PHG_Plot_2D_Distrib(zz, phg); Zoltan_PHG_Plot_2D_Distrib(zz, shg); #endif End: if (ierr < 0) { Zoltan_HG_HGraph_Free(*gathered_hg); ZOLTAN_FREE(gathered_hg); } Zoltan_Multifree(__FILE__, __LINE__, 2, &send_buf, &recv_size); if (nProc_x > 1 && nProc_y > 1) Zoltan_Multifree(__FILE__, __LINE__, 4, &col_vedge, &col_vindex, &col_hvertex, &col_hindex); return ierr; }