コード例 #1
0
ファイル: phg_refinement.c プロジェクト: agrippa/Trilinos
static int refine_fm2 (ZZ *zz,
                       HGraph *hg,
                       int p,
                       float *part_sizes,
                       Partition part,
                       PHGPartParams *hgp,
                       float bal_tol
    )
{
    int    i, j, ierr=ZOLTAN_OK, *pins[2]={NULL,NULL}, *lpins[2]={NULL,NULL};
    int    *moves=NULL, *mark=NULL, *adj=NULL, passcnt=0;
    float  *gain=NULL, *lgain=NULL;
    int    best_cutsizeat, cont, successivefails=0;
    double total_weight, weights[2], total_lweight, lweights[2], lwadjust[2],
        max_weight[2], lmax_weight[2], avail[2], gavail[2];
    int availcnt[2], gavailcnt[2];
    double targetw0, ltargetw0, minvw=DBL_MAX;
    double cutsize, best_cutsize, 
        best_limbal, imbal, limbal;
    HEAP   heap[2];
    char   *yo="refine_fm2";
    int    part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
#ifdef HANDLE_ISOLATED_VERTICES    
    int    isocnt=hg->nVtx; /* only root uses isocnt, isolated vertices
                               are kept at the end of moves array */
    int    *deg=NULL, *ldeg=NULL;
#if 0
    double best_imbal;
#endif
#endif
    PHGComm *hgc=hg->comm;
    int rootRank;
    
    struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
    int do_timing = (hgp->use_timers > 2);
    int detail_timing = (hgp->use_timers > 3);

    ZOLTAN_TRACE_ENTER(zz, yo);

    if (p != 2) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "p!=2 not allowed for refine_fm2.");
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_FATAL;
    }

    /* return only if globally there is no edge or vertex */
    if (!hg->dist_y[hgc->nProc_y] || hg->dist_x[hgc->nProc_x] == 0) {
        ZOLTAN_TRACE_EXIT(zz, yo);
        return ZOLTAN_OK;
    }


#ifdef USE_SERIAL_REFINEMENT_ON_ONE_PROC
    if (hgc->nProc==1){ /* only one proc? use serial code */
        ZOLTAN_TRACE_EXIT(zz, yo);
        return serial_fm2 (zz, hg, p, part_sizes, part, hgp, bal_tol);
    }
#endif

    if (do_timing) { 
        if (timer->rfrefine < 0) 
            timer->rfrefine = Zoltan_Timer_Init(zz->ZTime, 1, "Ref_P_Total");
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfrefine, hgc->Communicator);
    }
    if (detail_timing) {
        if (timer->rfpins < 0) 
            timer->rfpins = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Pins");
        if (timer->rfiso < 0) 
            timer->rfiso = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_IsolatedVert");
        if (timer->rfgain < 0) 
            timer->rfgain = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Gain");
        if (timer->rfheap < 0) 
            timer->rfheap = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Heap");
        if (timer->rfpass < 0) 
            timer->rfpass = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Pass");
        if (timer->rfroll < 0) 
            timer->rfroll = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_Roll");
        if (timer->rfnonroot < 0) 
            timer->rfnonroot = Zoltan_Timer_Init(zz->ZTime, 0, "Ref_P_NonRoot");
    }
    
    
    /* find the index of the proc in column group with 
       the most #nonzeros; it will be our root
       proc for computing moves since it has better 
       knowedge about global hypergraph.
       We ignore returned #pins (i) in root */
    Zoltan_PHG_Find_Root(hg->nPins, hgc->myProc_y, hgc->col_comm, 
                         &i, &rootRank);
    
    /* Calculate the weights in each partition and total, then maxima */
    weights[0] = weights[1] = 0.0;
    lweights[0] = lweights[1] = 0.0;
    if (hg->vwgt) 
        for (i = 0; i < hg->nVtx; i++) {
            lweights[part[i]] += hg->vwgt[i*hg->VtxWeightDim];
            minvw = (minvw > hg->vwgt[i*hg->VtxWeightDim]) 
                  ? hg->vwgt[i*hg->VtxWeightDim] 
                  : minvw;
        }
    else {
        minvw = 1.0;
        for (i = 0; i < hg->nVtx; i++)
            lweights[part[i]] += 1.0;
    }

    MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
    total_weight = weights[0] + weights[1];
    targetw0 = total_weight * part_sizes[0]; /* global target weight for part 0 */

    max_weight[0] = total_weight * bal_tol * part_sizes[0];
    max_weight[1] = total_weight * bal_tol * part_sizes[part_dim]; /* should be (1 - part_sizes[0]) */


    if (weights[0]==0.0) {
        ltargetw0 = targetw0 / hgc->nProc_x;
        lmax_weight[0] = max_weight[0] / hgc->nProc_x;
    } else {
        lmax_weight[0] = (weights[0]==0.0) ? 0.0 : lweights[0] +
            (max_weight[0] - weights[0]) * ( lweights[0] / weights[0] );
        ltargetw0 = targetw0 * ( lweights[0] / weights[0] ); /* local target weight */
    }
    if (weights[1]==0.0)
        lmax_weight[1] = max_weight[1] / hgc->nProc_x;
    else
        lmax_weight[1] = (weights[1]==0.0) ? 0.0 : lweights[1] +
            (max_weight[1] - weights[1]) * ( lweights[1] / weights[1] );

    total_lweight = lweights[0]+lweights[1];
    
    avail[0] = MAX(0.0, lmax_weight[0]-total_lweight);
    avail[1] = MAX(0.0, lmax_weight[1]-total_lweight);
    availcnt[0] = (avail[0] == 0) ? 1 : 0;
    availcnt[1] = (avail[1] == 0) ? 1 : 0; 
    MPI_Allreduce(avail, gavail, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
    MPI_Allreduce(availcnt, gavailcnt, 2, MPI_INT, MPI_SUM, hgc->row_comm);

#ifdef _DEBUG
    if (gavailcnt[0] || gavailcnt[1])
        uprintf(hgc, "before adjustment, LMW[%.1lf, %.1lf]\n", lmax_weight[0], lmax_weight[1]);
#endif

    if (gavailcnt[0]) 
        lmax_weight[0] += gavail[0] / (double) gavailcnt[0];
    
    if (gavailcnt[1]) 
        lmax_weight[1] += gavail[1] / (double) gavailcnt[1];
    
    /* Our strategy is to stay close to the current local weight balance.
       We do not need the same local balance on each proc, as long as
       we achieve approximate global balance.                            */

#ifdef _DEBUG
    imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
    limbal = (ltargetw0==0.0) ? 0.0 : fabs(lweights[0]-ltargetw0)/ltargetw0;
    uprintf(hgc, "H(%d, %d, %d), FM2: W[%.1lf, %.1lf] MW:[%.1lf, %.1lf] I=%.3lf  LW[%.1lf, %.1lf] LMW[%.1lf, %.1lf] LI=%.3lf\n", hg->nVtx, hg->nEdge, hg->nPins, weights[0], weights[1], max_weight[0], max_weight[1], imbal, lweights[0], lweights[1], lmax_weight[0], lmax_weight[1], limbal);
#endif

    
    if ((hg->nEdge && (!(pins[0]    = (int*) ZOLTAN_MALLOC(2 * hg->nEdge * sizeof(int)))
                      || !(lpins[0] = (int*) ZOLTAN_CALLOC(2 * hg->nEdge, sizeof(int))))) ||
        (hg->nVtx && (!(moves   = (int*)   ZOLTAN_MALLOC(hg->nVtx * sizeof(int)))
                     || !(lgain = (float*) ZOLTAN_MALLOC(hg->nVtx * sizeof(float))))))
        MEMORY_ERROR;

    if (hg->nEdge) {
        pins[1] = &(pins[0][hg->nEdge]);
        lpins[1] = &(lpins[0][hg->nEdge]);
    }

    if (hgc->myProc_y==rootRank) { /* only root needs mark, adj, gain and heaps*/
        Zoltan_Heap_Init(zz, &heap[0], hg->nVtx);
        Zoltan_Heap_Init(zz, &heap[1], hg->nVtx);  
        if (hg->nVtx &&
            (!(mark     = (int*)   ZOLTAN_CALLOC(hg->nVtx, sizeof(int)))
             || !(adj   = (int*)   ZOLTAN_MALLOC(hg->nVtx * sizeof(int)))   
             || !(gain  = (float*) ZOLTAN_MALLOC(hg->nVtx * sizeof(float)))))
            MEMORY_ERROR;
    }

    /* Initial calculation of the local pin distribution (sigma in UVC's papers)  */
    if (detail_timing)         
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfpins, hgc->Communicator);                        
    for (i = 0; i < hg->nEdge; ++i)
        for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j){
            ++(lpins[part[hg->hvertex[j]]][i]);
        }
    if (detail_timing)         
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpins, hgc->Communicator);                    
    

#ifdef HANDLE_ISOLATED_VERTICES        
    /* first compute vertex degree to find any isolated vertices
       we use lgain and gain, as ldeg, deg.*/
    if (hg->nVtx) {
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);        
        ldeg = (int *) lgain;
        deg = (int *) gain; /* null for non-root but that is fine */
        for (i = 0; i < hg->nVtx; ++i)
            ldeg[i] = hg->vindex[i+1] - hg->vindex[i];
        MPI_Reduce(ldeg, deg, hg->nVtx, MPI_INT, MPI_SUM, rootRank,
                   hg->comm->col_comm);

        if (hgc->myProc_y==rootRank) { /* root marks isolated vertices */
            for (i=0; i<hg->nVtx; ++i)
                if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
                    if (!deg[i]) {
                        moves[--isocnt] = i;
                        part[i] = -(part[i]+1); /* remove those vertices from that part*/
                    }
                }
        }   
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);        

    }
#endif
    
    do {
        int v=1, movecnt=0, neggaincnt=0, from, to;
        int maxneggain = (hgp->fm_max_neg_move < 0) ? hg->nVtx : hgp->fm_max_neg_move;
        int notfeasible=(weights[0]>max_weight[0]) || (weights[1]>max_weight[1]);
    
        /* now compute global pin distribution */
        if (hg->nEdge) {
            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfpins, hgc->Communicator);                    
            MPI_Allreduce(lpins[0], pins[0], 2*hg->nEdge, MPI_INT, MPI_SUM, 
                          hgc->row_comm);
            if (detail_timing)         
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpins, hgc->Communicator);                    
        }

        /* now we can compute actual cut */
        best_cutsizeat=0;
        cutsize = 0.0;
        for (i=0; i < hg->nEdge; ++i) {
            if (pins[0][i] && pins[1][i])
                cutsize += (hg->ewgt ? hg->ewgt[i] : 1.0);
        }
        MPI_Allreduce(&cutsize, &best_cutsize, 1, MPI_DOUBLE, MPI_SUM, hgc->col_comm);
        cutsize = best_cutsize;

        imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;        
        best_limbal = limbal = (ltargetw0==0.0) ? 0.0
            : fabs(lweights[0]-ltargetw0)/ltargetw0;

        /* UVCUVC: it looks like instead of moving always from overloaded
           part, alternating the 'from' part gives better results.
           Hence if the imbal is not really bad (2x worse) we use that approach  */
        if (imbal > BADBALANCE*(bal_tol-1.0) ) /* decide which way the moves will be in this pass */
            from = (weights[0] < targetw0) ? 1 : 0;
        else 
            from = passcnt % 2; 
        /* we want to be sure that everybody!!! picks the same source */
        MPI_Bcast(&from, 1, MPI_INT, 0, hgc->Communicator); 

        to = 1-from;
        
#ifdef _DEBUG
        /* Just for debugging */
        best_cutsize = Zoltan_PHG_Compute_NetCut(hgc, hg, part);
        if (best_cutsize!=cutsize) {
            errexit("%s: Initial cutsize=%.2lf Verify: total=%.2lf\n", uMe(hgc), cutsize,
                    best_cutsize);
        }
        if (hgc->myProc_y==rootRank)
            for (i = 0; i< hg->nVtx; ++i)
                if (mark[i])
                    errexit("mark[%d]=%d", i, mark[i]);
        /* debuggging code ends here */
#endif

        /* compute only the gains of the vertices from 'from' part */
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfgain, hgc->Communicator);                    
        
        for (i = 0; i < hg->nVtx; ++i) {
            lgain[i] = 0.0;
            if ((part[i]==from) && (!hgp->UseFixedVtx || hg->fixed_part[i]<0))
                for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
                    int edge = hg->vedge[j];
                    if ((pins[0][edge]+pins[1][edge])>1) { /* if they have at least 2 pins :) */
                        if (pins[part[i]][edge] == 1)
                            lgain[i] += (hg->ewgt ? hg->ewgt[edge] : 1.0);
                        else if (pins[1-part[i]][edge] == 0)
                            lgain[i] -= (hg->ewgt ? hg->ewgt[edge] : 1.0);
                    }
                }
        }
        /* now sum up all gains on only root proc */
        if (hg->nVtx)
            MPI_Reduce(lgain, gain, hg->nVtx, MPI_FLOAT, MPI_SUM, rootRank, 
                       hgc->col_comm);
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfgain, hgc->Communicator);                    
        

        if (hgp->output_level >= PHG_DEBUG_ALL) {
            imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
            printf("%s FM Pass %d (%d->%d) Cut=%.2f W[%5.0f, %5.0f] I= %.2f LW[%5.0f, %5.0f] LI= %.2f\n", uMe(hgc), passcnt, from, to, cutsize, weights[0], weights[1], imbal, lweights[0], lweights[1], limbal);
        }

        if (hgc->myProc_y==rootRank) {
            /* those are the lucky ones; each proc in column-group
               could have compute the same moves concurrently; but for this
               version we'll do it in the root procs and broadcast */

#ifdef HANDLE_ISOLATED_VERTICES
            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);                    
            lwadjust[0] = lwadjust[1] = 0.0;
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int   u=moves[i], pno=-part[u]-1;
                float w=(hg->vwgt ? hg->vwgt[u*hg->VtxWeightDim] : 1.0);

                if (pno<0 || pno>1)
                    errexit("heeeey pno=%d", pno);
                /* let's remove it from its part */
                lwadjust[pno] -= w;                
            }
            lweights[0] += lwadjust[0];
            lweights[1] += lwadjust[1];
            if (detail_timing)         
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);                    
#endif

            if (detail_timing)         
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfheap, hgc->Communicator);                    
            
            /* Initialize the heaps and fill them with the gain values */
            Zoltan_Heap_Clear(&heap[from]);  
            for (i = 0; i < hg->nVtx; ++i)
                if ((part[i]==from) && (!hgp->UseFixedVtx || hg->fixed_part[i]<0))
                    Zoltan_Heap_Input(&heap[from], i, gain[i]);
            Zoltan_Heap_Make(&heap[from]);
            if (detail_timing) {
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfheap, hgc->Communicator);
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfpass, hgc->Communicator);
            }

            while ((neggaincnt < maxneggain) && ((lweights[to]+minvw) <= lmax_weight[to]) ) {
                if (Zoltan_Heap_Empty(&heap[from])) { /* too bad it is empty */
                    v = -1;
                    break;
                }
                
                v = Zoltan_Heap_Extract_Max(&heap[from]);    
                
#ifdef _DEBUG
                if (from != part[v])
                    errexit("hooop from=%d part[%d]=%d", from, v, part[v]);
#endif

                /* Mark vertex we picked from the heap so it is "locked". 
                   For the current strategy, moving only one direction 
                   at a time, the mark information is not critical.
                   Note that the mark array is also used in the move/update 
                   routine so don't remove it! */
                ++mark[v];
                if (lweights[to]+((hg->vwgt)?hg->vwgt[v*hg->VtxWeightDim]:1.0) > lmax_weight[to]) {
#ifdef _DEBUG2                    
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d [%4.0lf, %4.0lf] NF\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1]);
#endif
                    /* Negative value in moves array means we have examined 
                       the vertex but couldn't move it. Note offset by one,
                       otherwise zero would be ambiguous. */
                    moves[movecnt++] = -(v+1);
                    continue;
                } 

                    
                moves[movecnt] = v;
                ++neggaincnt;
                cutsize -= gain[v];

                fm2_move_vertex_oneway(v, hg, part, gain, heap, pins, lpins, weights, lweights, mark, adj);
                imbal = (targetw0==0.0) ? 0.0
                    : fabs(weights[0]-targetw0)/targetw0;
                limbal = (ltargetw0==0.0) ? 0.0
                    : fabs(lweights[0]-ltargetw0)/ltargetw0;

                if (notfeasible || (cutsize<best_cutsize) ||
                                   (cutsize==best_cutsize && limbal < best_limbal)) {
#ifdef _DEBUG2                    
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d W[%4.0lf, %4.0lf] I:%.2lf LW[%4.0lf, %4.0lf] LI:%.2lf C:%.1lf<-- Best\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1], imbal, lweights[0], lweights[1], limbal, cutsize); /* after move gain is -oldgain */
#endif
                    notfeasible = weights[from]>max_weight[from];
                    best_cutsize = cutsize;
                    best_cutsizeat = movecnt+1;
                    best_limbal = limbal;
                    neggaincnt = 0;
                }
#ifdef _DEBUG2                
                else
                    printf("%s %4d: %6d (g: %5.1lf), p:%2d [%4.0lf, %4.0lf] %.1lf\n", uMe(hgc), movecnt, v, gain[v], from, weights[0], weights[1], cutsize);
#endif
                ++movecnt;
            }

            
            if (detail_timing) {
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfpass, hgc->Communicator);
                ZOLTAN_TIMER_START(zz->ZTime, timer->rfroll, hgc->Communicator);
            }

#ifdef _DEBUG
	    if (v<0)
                uprintf(hgc, "EOLB @ %d there was no vertex to select: v=%d\n", movecnt, v);
	    else if (neggaincnt >= maxneggain) 
                uprintf(hgc, "EOLB @ %d max neg move reached neggaincnt(%d) >= maxneggain\n", movecnt, neggaincnt, maxneggain);
	    else 
                uprintf(hgc, "EOLB @ %d balance constraint LW[%.1lf, %.1lf] and MAXW[%.1lf, %.1lf]\n", movecnt, lweights[0], lweights[1], lmax_weight[0], lmax_weight[1]);
#endif
            
            /* roll back the moves without any improvement */
            for (i=movecnt-1; i>=best_cutsizeat; --i) {
                int vv = moves[i];
                if (vv<0)
                    vv = -vv-1;
                else /* we don't need to roll pins, or weights etc; rolling local ones suffices */
                    fm2_move_vertex_oneway_nonroot(vv, hg, part, lpins, lweights);
                mark[vv] = 0;
            }
            for (i=0; i<best_cutsizeat; ++i){
                int vv = (moves[i] < 0 ) ? -moves[i] - 1 : moves[i];
                mark[vv] = 0;
            }
            if (detail_timing) 
                ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfroll, hgc->Communicator);            
        }

        if (detail_timing) 
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfnonroot, hgc->Communicator);            
        
        /* now root bcast moves to column procs */
        MPI_Bcast(&best_cutsizeat, 1, MPI_INT, rootRank, hgc->col_comm);
        MPI_Bcast(moves, best_cutsizeat, MPI_INT, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) { /* now non-root does move simulation */
            for (i=0; i<best_cutsizeat; ++i) {
                int vv = moves[i];
                if (vv>=0)
                    fm2_move_vertex_oneway_nonroot(vv, hg, part, lpins, lweights);
            }
        }
        if (detail_timing) 
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfnonroot, hgc->Communicator);            

        
#ifdef _DEBUG
        for (i = 0; i < hg->nEdge; ++i) {
            int lp[2];

            lp[0] = lp[1] = 0;
            for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j)
                ++(lp[part[hg->hvertex[j]]]);
            if ((lp[0] != lpins[0][i]) || (lp[1] != lpins[1][i]))
                errexit("for net %d -- lp=[%d, %d] lpins[%d, %d]", i, lp[0], lp[1], lpins[0][i], lpins[1][i]);
        }
#endif


#ifdef HANDLE_ISOLATED_VERTICES
        if (detail_timing)         
            ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);        
        
#if 0
        MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);        
        best_imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        if (hgc->myProc_y==rootRank)             
            uprintf(hgc, "BEFORE ISOLATED VERTEX HANDLING WE *THINK* GLOBAL IMBALANCE is %.3lf\n", best_imbal);
#endif
        
        if (hgc->myProc_y==rootRank) {
            best_limbal = (ltargetw0==0.0) ? 0.0
                : fabs(lweights[0]-ltargetw0)/ltargetw0;
            
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int u = moves[i], npno;
                float w=(hg->vwgt ? hg->vwgt[u*hg->VtxWeightDim] : 1.0);

                npno = (lweights[0] < ltargetw0) ? 0 : 1;
                lweights[npno] += w;
                lwadjust[npno] += w;
                part[u] = -(npno+1); /* move to npno (might be same as pno;
                                        so it may not be a real move */
            }
            limbal = (ltargetw0==0.0) ? 0.0
                : fabs(lweights[0]-ltargetw0)/ltargetw0;
#if 0           
            uprintf(hgc, "before binpacking of %d isolated vertices balance was: %.3lf now: %.3lf\n", hg->nVtx-isocnt, best_limbal, limbal);
#endif
        }

        MPI_Bcast(lwadjust, 2, MPI_DOUBLE, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) {
            lweights[0] += lwadjust[0];
            lweights[1] += lwadjust[1];
        }
        if (detail_timing)         
            ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);                
#endif        
        
        MPI_Allreduce(lweights, weights, 2, MPI_DOUBLE, MPI_SUM, hgc->row_comm);
#if 0       
        best_imbal = (targetw0==0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        if (hgc->myProc_y==rootRank)             
            uprintf(hgc, "NEW GLOBAL IMBALANCE is %.3lf\n", best_imbal);
#endif
        
        if (weights[0]==0.0) 
            ltargetw0 = lmax_weight[0] = 0.0;
        else {
            lmax_weight[0] = lweights[0] +
                (max_weight[0] - weights[0]) * ( lweights[0] / weights[0] );
            ltargetw0 = targetw0 * ( lweights[0] / weights[0] ); /* local target weight */
        }
        lmax_weight[1] = (weights[1]==0.0) ? 0.0 : lweights[1] +
            (max_weight[1] - weights[1]) * ( lweights[1] / weights[1] );
        
        cont = 0;
        MPI_Allreduce(&best_cutsizeat, &cont, 1, MPI_INT, MPI_LOR, hgc->row_comm);

        /* since we're only moving in one direction; make sure two successive
           pass didn't produce any improvement before terminating */
        if (!cont)
            ++successivefails; 
        else
            successivefails = 0; 
#ifdef _DEBUG
        /* Just for debugging */
        best_cutsize = Zoltan_PHG_Compute_NetCut(hgc, hg, part);
        imbal = (targetw0 == 0.0) ? 0.0 : fabs(weights[0]-targetw0)/targetw0;
        printf("%s End of Pass %d Comp.Cut=%.2lf RealCut=%.2lf W[%5.0lf, %5.0lf] Imbal=%.2lf\n", uMe(hgc), passcnt, cutsize, best_cutsize, weights[0], weights[1], imbal);
        /* debuggging code ends here */
#endif
    } while (successivefails<2 &&  (++passcnt < hgp->fm_loop_limit));


#ifdef HANDLE_ISOLATED_VERTICES
    if (detail_timing)         
        ZOLTAN_TIMER_START(zz->ZTime, timer->rfiso, hgc->Communicator);            
    /* now root sneds the final part no's of isolated vertices; if any */
    MPI_Bcast(&isocnt, 1, MPI_INT, rootRank, hgc->col_comm);
    if (isocnt<hg->nVtx) {
        deg = (int *) lgain; /* we'll use for part no's of isolated vertices */
        if (hgc->myProc_y==rootRank) 
            for (i=isocnt; i < hg->nVtx; ++i) { /* go over isolated vertices */
                int u = moves[i];
                deg[i] = part[u] = -part[u]-1; 
            }
            
        MPI_Bcast(&moves[isocnt], hg->nVtx-isocnt, MPI_INT, rootRank, hgc->col_comm);
        MPI_Bcast(&deg[isocnt], hg->nVtx-isocnt, MPI_INT, rootRank, hgc->col_comm);
        if (hgc->myProc_y!=rootRank) 
            for (i=isocnt; i < hg->nVtx; ++i)  /* go over isolated vertices */
                part[moves[i]] = deg[i];
    }
    if (detail_timing)         
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfiso, hgc->Communicator);            
#endif
 End:    

    if (hgc->myProc_y==rootRank) { /* only root needs mark, adj, gain and heaps*/        
        Zoltan_Multifree(__FILE__,__LINE__, 3, &mark, &adj, &gain);
        Zoltan_Heap_Free(&heap[0]);
        Zoltan_Heap_Free(&heap[1]);        
    }
    
    Zoltan_Multifree(__FILE__, __LINE__, 4, &pins[0], &lpins[0], &moves, &lgain);

    if (do_timing) 
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->rfrefine, hgc->Communicator);
    
    
    ZOLTAN_TRACE_EXIT(zz, yo);
    return ierr;
}
コード例 #2
0
int
Zoltan_Matrix_Build (ZZ* zz, Zoltan_matrix_options *opt, Zoltan_matrix* matrix)
{
  static char *yo = "Zoltan_Matrix_Build";
  int ierr = ZOLTAN_OK;
  int nX;
  int  *xGNO = NULL;
  ZOLTAN_ID_PTR xLID=NULL;
  ZOLTAN_ID_PTR xGID=NULL;
  ZOLTAN_ID_PTR yGID=NULL;
  ZOLTAN_ID_PTR pinID=NULL;
  float *xwgt = NULL;
  int * Input_Parts=NULL;
  struct Zoltan_DD_Struct *dd = NULL;
  int *proclist = NULL;
  int *xpid = NULL;
  int i;

  ZOLTAN_TRACE_ENTER(zz, yo);

  memset (matrix, 0, sizeof(Zoltan_matrix)); /* Set all fields to 0 */
  memcpy (&matrix->opts, opt, sizeof(Zoltan_matrix_options));

  /**************************************************/
  /* Obtain vertex information from the application */
  /**************************************************/

  ierr = Zoltan_Get_Obj_List(zz, &nX, &xGID, &xLID,
			     zz->Obj_Weight_Dim, &xwgt,
			     &Input_Parts);

  if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error getting object data");
    goto End;
  }
  ZOLTAN_FREE(&Input_Parts);
  ZOLTAN_FREE(&xwgt);

  /*******************************************************************/
  /* Assign vertex consecutive numbers (gnos)                        */
  /*******************************************************************/

  if (matrix->opts.speed == MATRIX_FULL_DD) { /* Zoltan computes a translation */
    if (nX) {
      xGNO = (int*) ZOLTAN_MALLOC(nX*sizeof(int));
      if (xGNO == NULL)
	MEMORY_ERROR;
    }
    ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, xGNO, nX, matrix->opts.randomize, &matrix->globalX);

    if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error assigning global numbers to vertices");
      goto End;
    }

    ierr = Zoltan_DD_Create (&dd, zz->Communicator, zz->Num_GID, 1,
			     0, nX, 0);
    CHECK_IERR;

    /* Make our new numbering public */
    Zoltan_DD_Update (dd, xGID, (ZOLTAN_ID_PTR) xGNO, NULL,  NULL, nX);
  }
  else { /* We don't want to use the DD */
    xGNO = (int *) xGID;
    MPI_Allreduce(&nX, &matrix->globalX, 1, MPI_INT, MPI_SUM, zz->Communicator);
  }

  /* I store : xGNO, xGID, xpid,  */
  ierr = Zoltan_DD_Create (&matrix->ddX, zz->Communicator, 1, zz->Num_GID,
			   1, matrix->globalX/zz->Num_Proc, 0);
  CHECK_IERR;

  /* Hope a linear assignment will help a little */
  Zoltan_DD_Set_Neighbor_Hash_Fn1(matrix->ddX, matrix->globalX/zz->Num_Proc);
  /* Associate all the data with our xGNO */
  xpid = (int*)ZOLTAN_MALLOC(nX*sizeof(int));
  if (nX >0 && xpid == NULL) MEMORY_ERROR;
  for (i = 0 ; i < nX ; ++i)
    xpid[i] = zz->Proc;

  Zoltan_DD_Update (matrix->ddX, (ZOLTAN_ID_PTR)xGNO, xGID, (ZOLTAN_ID_PTR) xpid, NULL, nX);
  ZOLTAN_FREE(&xpid);

  if (matrix->opts.pinwgt)
    matrix->pinwgtdim = zz->Edge_Weight_Dim;
  else
    matrix->pinwgtdim = 0;

  ierr = matrix_get_edges(zz, matrix, &yGID, &pinID, nX, &xGID, &xLID, &xGNO, &xwgt);
  CHECK_IERR;
  matrix->nY_ori = matrix->nY;

  if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)){
    goto End;
  }

  if (matrix->opts.enforceSquare && matrix->redist) {
    /* Convert yGID to yGNO using the same translation as x */
    /* Needed for graph : rowID = colID */
    /* y and x may have different distributions */
    matrix->yGNO = (int*)ZOLTAN_MALLOC(matrix->nY * sizeof(int));
    if (matrix->nY && matrix->yGNO == NULL)
      MEMORY_ERROR;
    ierr = Zoltan_DD_Find (dd, yGID, (ZOLTAN_ID_PTR)(matrix->yGNO), NULL, NULL,
		    matrix->nY, NULL);
    if (ierr != ZOLTAN_OK) {
      ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Hyperedge GIDs don't match.\n");
      ierr = ZOLTAN_FATAL;
      goto End;
    }
  }

  if (matrix->opts.local) { /* keep only local edges */
    proclist = (int*) ZOLTAN_MALLOC(matrix->nPins*sizeof(int));
    if (matrix->nPins && proclist == NULL) MEMORY_ERROR;
  }
  else
    proclist = NULL;

  /* Convert pinID to pinGNO using the same translation as x */
  if (matrix->opts.speed == MATRIX_FULL_DD) {
    matrix->pinGNO = (int*)ZOLTAN_MALLOC(matrix->nPins* sizeof(int));
    if ((matrix->nPins > 0) && (matrix->pinGNO == NULL)) MEMORY_ERROR;

    ierr = Zoltan_DD_Find (dd, pinID, (ZOLTAN_ID_PTR)(matrix->pinGNO), NULL, NULL,
			   matrix->nPins, proclist);
    if (ierr != ZOLTAN_OK) {
      ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Undefined GID found.\n");
      ierr = ZOLTAN_FATAL;
      goto End;
    }
    ZOLTAN_FREE(&pinID);
    Zoltan_DD_Destroy(&dd);
    dd = NULL;
  }
  else {
    matrix->pinGNO = (int *) pinID;
    pinID = NULL;
  }

/*   if (matrix->opts.local) {  /\* keep only local edges *\/ */
/*     int *nnz_list; /\* nnz offset to delete *\/ */
/*     int nnz;       /\* number of nnz to delete *\/ */
/*     int i; */

/*     nnz_list = (int*) ZOLTAN_MALLOC(matrix->nPins*sizeof(int)); */
/*     if (matrix->nPins && nnz_list == NULL) MEMORY_ERROR; */
/*     for (i = 0, nnz=0 ; i < matrix->nPins ; ++i) { */
/*       if (proclist[i] == zz->Proc) continue; */
/*       nnz_list[nnz++] = i; */
/*     } */
/*     ZOLTAN_FREE(&proclist); */
/*     Zoltan_Matrix_Delete_nnz(zz, matrix, nnz, nnz_list); */
/*   } */

  if (!matrix->opts.enforceSquare) {
    /* Hyperedges name translation is different from the one of vertices */
    matrix->yGNO = (int*)ZOLTAN_CALLOC(matrix->nY, sizeof(int));
    if (matrix->nY && matrix->yGNO == NULL) MEMORY_ERROR;

    /*     int nGlobalEdges = 0; */
    ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, matrix->yGNO, matrix->nY,
					     matrix->opts.randomize, &matrix->globalY);
    CHECK_IERR;

/*     /\**************************************************************************************** */
/*      * If it is desired to remove dense edges, divide the list of edges into */
/*      * two lists.  The ZHG structure will contain the removed edges (if final_output is true), */
/*      * and the kept edges will be returned. */
/*      ****************************************************************************************\/ */
/*     totalNumEdges = zhg->globalHedges; */

/*     ierr = remove_dense_edges_matrix(zz, zhg, edgeSizeThreshold, final_output, */
/*				     &nLocalEdges, &nGlobalEdges, &nPins, */
/*				     &edgeGNO, &edgeSize, &edgeWeight, &pinGNO, &pinProcs); */

/*     if (nGlobalEdges < totalNumEdges){ */
/*       /\* re-assign edge global numbers if any edges were removed *\/ */
/*       ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, edgeGNO, nLocalEdges, */
/*					       randomizeInitDist, &totalNumEdges); */
/*       if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) { */
/*	ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error reassigning global numbers to edges"); */
/*	goto End; */
/*       } */
/*     } */

      /* We have to define ddY : yGNO, yGID, ywgt */
      ierr = Zoltan_DD_Create (&matrix->ddY, zz->Communicator, 1, zz->Num_GID,
			       0, matrix->globalY/zz->Num_Proc, 0);
      /* Hope a linear assignment will help a little */
      Zoltan_DD_Set_Neighbor_Hash_Fn1(matrix->ddY, matrix->globalY/zz->Num_Proc);
      /* Associate all the data with our yGNO */
      Zoltan_DD_Update (matrix->ddY, (ZOLTAN_ID_PTR)matrix->yGNO,
			yGID, NULL, NULL, matrix->nY);
  }

 End:
  ZOLTAN_FREE(&xpid);
  ZOLTAN_FREE(&xLID);
  ZOLTAN_FREE(&xGNO);
  ZOLTAN_FREE(&xGID);
  ZOLTAN_FREE(&xwgt);
  ZOLTAN_FREE(&Input_Parts);
  ZOLTAN_FREE(&proclist);
  if (dd != NULL)
    Zoltan_DD_Destroy(&dd);
  /* Already stored in the DD */
  ZOLTAN_FREE(&yGID);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
コード例 #3
0
ファイル: lb_migrate.c プロジェクト: haripandey/trilinos
int Zoltan_Migrate(
    ZZ *zz,                      /* Zoltan structure.                  */
    int num_import,              /* Number of non-local objects assigned to the
                                  processor in the new decomposition.        */
    ZOLTAN_ID_PTR import_global_ids, /* Array of global IDs for non-local objects
                                  assigned to this processor in the new
                                  decomposition; this field can be NULL if
                                  the application doesn't provide import IDs.*/
    ZOLTAN_ID_PTR import_local_ids,  /* Array of local IDs for non-local objects
                                  assigned to the processor in the new
                                  decomposition; this field can be NULL if the
                                  application does not provide import IDs.   */
    int *import_procs,           /* Array of processor IDs of processors owning
                                  the non-local objects that are assigned to
                                  this processor in the new decomposition; this
                                  field can be NULL if the application does
                                  not provide import IDs.                    */
    int *import_to_part,         /* Array of partition numbers to which imported
                                  objects should be assigned.                */
    int num_export,              /* Number of objs to be exported
                                  to other processors to establish the new
                                  decomposition.                             */
    ZOLTAN_ID_PTR export_global_ids, /* Array of global IDs of
                                  objects to be exported to other processors
                                  to establish the new decomposition.        */
    ZOLTAN_ID_PTR export_local_ids,  /* Array of local IDs of
                                  objects to be exported to other processors
                                  to establish the new decomposition.        */
    int *export_procs,           /* Array of processor IDs
                                  to which objects will be exported
                                  to establish the new decomposition.        */
    int *export_to_part          /* Array of partition numbers to which exported
                                  objects should be assigned.                */
)
{
    /*
     *  Routine to help perform migration.  If migration pre-processing routine
     *  (ZOLTAN_PRE_MIGRATE_FN) is specified, this routine first calls that fn.
     *  It then calls a function to obtain the size of the migrating objects
     *  (ZOLTAN_OBJ_SIZE_FN).  The routine next calls an application-specified
     *  object packing routine (ZOLTAN_PACK_OBJ_FN) for each object
     *  to be exported.  It develops the needed communication map to move the
     *  objects to other processors.  It performs the communication according
     *  to the map, and then calls an application-specified object unpacking
     *  routine (ZOLTAN_UNPACK_OBJ_FN) for each object imported.
     */

    char *yo = "Zoltan_Migrate";
    int num_gid_entries, num_lid_entries;  /* lengths of global & local ids */
    int *sizes = NULL;       /* sizes (in bytes) of the object data for export. */
    int id_size;             /* size (in bytes) of ZOLTAN_GID + padding for
                            alignment                                       */
    int tag_size;            /* size (in bytes) of ZOLTAN_GID + one int
                            (for message size) */
    char *export_buf = NULL; /* buffer for packing export data.                 */
    char *import_buf = NULL; /* buffer for receiving imported data.             */
    char *tmp;               /* temporary pointer into buffers.                 */
    int i;                   /* loop counter.                                   */
    int tmp_size;            /* size of a single object's data.                 */
    int *idx = NULL;         /* index used for multi-fn packs and unpacks.      */
    int idx_cnt = 0;         /* index counter for idx array.                    */
    ZOLTAN_ID_PTR tmp_id = NULL; /* pointer to storage for a global ID in comm
                                buf  */
    ZOLTAN_ID_PTR lid;       /* temporary pointer to a local ID; used to pass
                            NULL to query functions when NUM_LID_ENTRIES=0. */
    ZOLTAN_COMM_OBJ *imp_plan = NULL; /* Comm obj built from import lists. */
    ZOLTAN_COMM_OBJ *exp_plan = NULL; /* Comm obj built from export lists. */
    int msgtag, msgtag2;     /* Tags for communication routines                 */
    int total_send_size;     /* Total size of outcoming message (in #items)     */
    int total_recv_size;     /* Total size of incoming message (in #items)      */
    int aligned_int;         /* size of an int padded for alignment             */
    int dest;                /* temporary destination partition.                */
    int include_parts = 0;   /* flag indicating whether partition info is
                            provided */
    int ierr = ZOLTAN_OK;
    int actual_num_exp = 0;
    int actual_exp_allocated = 0;
    ZOLTAN_ID_PTR actual_exp_gids = NULL;    /* Arrays containing only objs to  */
    ZOLTAN_ID_PTR actual_exp_lids = NULL;    /* actually be packed.  Objs that  */
    int *actual_exp_procs = NULL;            /* are changing partition but not  */
    int *actual_exp_to_part = NULL;          /* processor may not be included.  */
    int actual_num_imp = 0;
    int actual_imp_allocated = 0;
    ZOLTAN_ID_PTR actual_imp_gids = NULL;    /* Arrays containing only objs to  */
    ZOLTAN_ID_PTR actual_imp_lids = NULL;    /* actually be imported. Objs that  */
    int *actual_imp_procs = NULL;            /* are changing partition but not  */
    int *actual_imp_to_part = NULL;          /* processor may not be included.  */

    ZOLTAN_TRACE_ENTER(zz, yo);

    /*
     *  Return if this processor is not in the Zoltan structure's
     *  communicator.
     */

    if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz)) {
        goto End;
    }

    /*
     *  Check that all procs use the same id types.
     */

    ierr = check_input(zz,
                       ((num_export >= 0 && export_to_part) ||
                        (num_import >= 0 && import_to_part)),
                       &include_parts);
    if (ierr != ZOLTAN_OK)
        goto End;

    num_gid_entries = zz->Num_GID;
    num_lid_entries = zz->Num_LID;

    /*
     *  Check that all necessary query functions are available.
     */

    if (zz->Get_Obj_Size == NULL && zz->Get_Obj_Size_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_OBJ_SIZE_FN or ZOLTAN_OBJ_SIZE_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Pack_Obj == NULL && zz->Pack_Obj_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_PACK_OBJ_FN or ZOLTAN_PACK_OBJ_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Unpack_Obj == NULL && zz->Unpack_Obj_Multi == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Must register a "
                           "ZOLTAN_UNPACK_OBJ_FN or ZOLTAN_UNPACK_OBJ_MULTI_FN function "
                           "to use the migration-help tools.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }


    if (num_export >= 0) {

        /* Build the actual export arrays */
        ierr = actual_arrays(zz, num_gid_entries, num_lid_entries,
                             num_export, export_global_ids, export_local_ids,
                             export_procs, export_to_part,
                             &actual_num_exp, &actual_exp_gids, &actual_exp_lids,
                             &actual_exp_procs, &actual_exp_to_part,
                             &actual_exp_allocated);
        if (ierr < 0)
            goto End;

        /* Compute communication map based on actual exports.  */

        msgtag = 32767;
        ierr = Zoltan_Comm_Create(&exp_plan, actual_num_exp, actual_exp_procs,
                                  zz->Communicator, msgtag, &actual_num_imp);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Error returned from Zoltan_Comm_Create.");
            goto End;
        }
    }

    else if (num_import >= 0) {

        /* Build the actual import arrays */
        ierr = actual_arrays(zz, num_gid_entries, num_lid_entries,
                             num_import, import_global_ids, import_local_ids,
                             import_procs, import_to_part,
                             &actual_num_imp, &actual_imp_gids, &actual_imp_lids,
                             &actual_imp_procs, &actual_imp_to_part,
                             &actual_imp_allocated);
        if (ierr < 0)
            goto End;

        /* Compute communication map based on imports.  */
        msgtag = 32767;
        ierr = Zoltan_Comm_Create(&imp_plan, actual_num_imp, actual_imp_procs,
                                  zz->Communicator, msgtag, &actual_num_exp);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Error returned from Zoltan_Comm_Create.");
            goto End;
        }

        /* Compute actual export lists for packing objects */
        if (actual_num_exp > 0) {
            actual_exp_allocated = 1;
            actual_exp_gids = ZOLTAN_MALLOC_GID_ARRAY(zz, actual_num_exp);
            actual_exp_lids = ZOLTAN_MALLOC_LID_ARRAY(zz, actual_num_exp);
            actual_exp_procs = (int *) ZOLTAN_MALLOC(sizeof(int) * actual_num_exp);
            if (include_parts)
                actual_exp_to_part = (int *) ZOLTAN_MALLOC(sizeof(int)*actual_num_exp);
            if (actual_exp_gids == NULL ||
                    (num_lid_entries && actual_exp_lids == NULL) ||
                    actual_exp_procs == NULL ||
                    (import_to_part != NULL && actual_exp_to_part == NULL)) {
                Zoltan_Multifree(__FILE__, __LINE__, 4,
                                 &actual_exp_gids, &actual_exp_lids,
                                 &actual_exp_procs, &actual_exp_to_part);
                ierr = ZOLTAN_MEMERR;
                goto End;
            }
        }

        msgtag2 = 32766;
        ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_gids,
                              (int) (sizeof(ZOLTAN_ID_TYPE)*(num_gid_entries)),
                              (char *) actual_exp_gids);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
            goto End;
        }

        if (num_lid_entries) {
            msgtag2--;
            ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_lids,
                                  (int) (sizeof(ZOLTAN_ID_TYPE)*num_lid_entries),
                                  (char *) actual_exp_lids);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
                goto End;
            }
        }

        Zoltan_Comm_Info(imp_plan, NULL, NULL, NULL, NULL, NULL, NULL,
                         NULL, NULL, NULL, NULL, NULL, actual_exp_procs, NULL);

        if (include_parts) {
            msgtag2--;
            ierr = Zoltan_Comm_Do(imp_plan, msgtag2, (char *) actual_imp_to_part,
                                  (int) sizeof(int), (char *) actual_exp_to_part);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
                goto End;
            }
        }

        /* Create inverse plan (i.e., plan based on exports) so can set
         * variable sizes.
         * (Zoltan_Comm_Do_Reverse(imp_plan, ...) allows sending variable
         * but does not tell how large to allocate receive buffer.
         */
        ierr = Zoltan_Comm_Invert_Plan(&imp_plan);
        exp_plan = imp_plan;
        imp_plan = NULL;
    }
    else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Import or export lists needed.");
        ierr = ZOLTAN_FATAL;
        goto End;
    }

    if (zz->Migrate.Pre_Migrate_PP != NULL) {
        zz->Migrate.Pre_Migrate_PP(zz->Migrate.Pre_Migrate_PP_Data,
                                   num_gid_entries, num_lid_entries,
                                   num_import, import_global_ids,
                                   import_local_ids, import_procs, import_to_part,
                                   num_export, export_global_ids,
                                   export_local_ids, export_procs, export_to_part,
                                   &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_PRE_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Pre_Migrate != NULL) {
        zz->Migrate.Pre_Migrate(zz->Migrate.Pre_Migrate_Data,
                                num_gid_entries, num_lid_entries,
                                num_import, import_global_ids,
                                import_local_ids, import_procs,
                                num_export, export_global_ids,
                                export_local_ids, export_procs,
                                &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_PRE_MIGRATE_FN function.");
            goto End;
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done pre-migration processing");

    id_size = Zoltan_Align(num_gid_entries * sizeof(ZOLTAN_ID_TYPE));
    /* Note that alignment is not strictly necessary
       when ZOLTAN_ID_TYPE is int or unsigned int. */
    aligned_int = Zoltan_Align(sizeof(int));
    tag_size = id_size + aligned_int;

    /*
     * For each object, allow space for its global ID and its data plus
     * one int (for the object data size).
     * Zoltan will pack the global IDs; the application must pack the data
     * through the pack routine.  Zoltan needs the global IDs for unpacking,
     * as the order of the data received during communication is not
     * necessarily the same order as import_global_ids[].
     * Zoltan also needs to communicate the sizes of the objects because
     * only the sender knows the size of each object.
     */
    if (actual_num_exp > 0) {
        sizes = (int *) ZOLTAN_MALLOC(actual_num_exp * sizeof(int));
        if (!sizes) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }

        if (zz->Get_Obj_Size_Multi != NULL) {
            zz->Get_Obj_Size_Multi(zz->Get_Obj_Size_Multi_Data,
                                   num_gid_entries, num_lid_entries, actual_num_exp,
                                   actual_exp_gids, actual_exp_lids, sizes, &ierr);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_OBJ_SIZE_MULTI function.");
                goto End;
            }
        }
        else {
            for (i = 0; i < actual_num_exp; i++) {
                lid = (num_lid_entries ? &(actual_exp_lids[i*num_lid_entries]) : NULL);
                sizes[i] = zz->Get_Obj_Size(zz->Get_Obj_Size_Data,
                                            num_gid_entries, num_lid_entries,
                                            &(actual_exp_gids[i*num_gid_entries]),
                                            lid, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_OBJ_SIZE function.");
                    goto End;
                }
            }
        }

        total_send_size = 0;

        for (i = 0; i < actual_num_exp; i++) {
            sizes[i] = Zoltan_Align(sizes[i]);
            total_send_size += sizes[i] + tag_size;
        }
        export_buf = (char *) ZOLTAN_CALLOC(total_send_size, sizeof(char));
        if (!export_buf) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }

        if (zz->Pack_Obj_Multi != NULL) {
            /* Allocate an index array for ZOLTAN_PACK_OBJ_MULTI_FN. */
            idx = (int *) ZOLTAN_MALLOC(actual_num_exp * sizeof(int));
            if (!idx) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
                ierr = ZOLTAN_MEMERR;
                goto End;
            }
        }

        /*
         *  Pack the objects for export.
         */

        idx_cnt = 0;
        tmp = export_buf;
        for (i = 0; i < actual_num_exp; i++) {

            /* Pack the object's global ID */
            tmp_id = (ZOLTAN_ID_PTR) tmp;
            ZOLTAN_SET_GID(zz, tmp_id, &(actual_exp_gids[i*num_gid_entries]));
            tmp += id_size;

            /* Pack the object's size */
            *((int *)tmp) = sizes[i];
            tmp += aligned_int;

            /* If using ZOLTAN_PACK_OBJ_MULTI_FN, build the index array. */
            idx_cnt += tag_size;
            if (idx != NULL) {
                idx[i] = idx_cnt;
            }
            tmp += sizes[i];
            idx_cnt += sizes[i];
        }

        if (zz->Pack_Obj_Multi != NULL) {
            if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                printf("[%1d] DEBUG in %s: Packing objects with multi-pack\n",
                       zz->Proc, yo);
            }
            zz->Pack_Obj_Multi(zz->Pack_Obj_Multi_Data,
                               num_gid_entries, num_lid_entries, actual_num_exp,
                               actual_exp_gids, actual_exp_lids,
                               (actual_exp_to_part!=NULL ? actual_exp_to_part
                                : actual_exp_procs),
                               sizes, idx, export_buf, &ierr);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_PACK_OBJ_MULTI function.");
                goto End;
            }
        }
        else {
            tmp = export_buf + tag_size;
            for (i = 0; i < actual_num_exp; i++) {
                if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                    printf("[%1d] DEBUG in %s: Packing object with gid ", zz->Proc, yo);
                    ZOLTAN_PRINT_GID(zz, &(actual_exp_gids[i*num_gid_entries]));
                    printf("size = %d bytes\n", sizes[i]);
                }

                /* Pack the object's data */
                lid = (num_lid_entries ? &(actual_exp_lids[i*num_lid_entries]) : NULL);
                dest = (actual_exp_to_part != NULL ? actual_exp_to_part[i]
                        : actual_exp_procs[i]);
                zz->Pack_Obj(zz->Pack_Obj_Data,
                             num_gid_entries, num_lid_entries,
                             &(actual_exp_gids[i*num_gid_entries]), lid, dest,
                             sizes[i], tmp, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_PACK_OBJ function.");
                    goto End;
                }
                tmp += sizes[i] + tag_size;
            }
        }
        ZOLTAN_FREE(&idx);
        tmp_id = NULL;
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done packing objects");


    /* Modify sizes[] to contain message sizes, not object sizes */
    for (i=0; i<actual_num_exp; i++) {
        sizes[i] += tag_size;
    }

    msgtag--;
    ierr = Zoltan_Comm_Resize(exp_plan, sizes, msgtag, &total_recv_size);
    if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Resize.");
        goto End;
    }

    if (actual_num_imp > 0) {
        import_buf = (char *) ZOLTAN_MALLOC(total_recv_size);
        if (!import_buf) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
            ierr = ZOLTAN_MEMERR;
            goto End;
        }
    }

    /*
     *  Send the export data using the communication plan.
     */

    msgtag2 = 32765;
    ierr = Zoltan_Comm_Do(exp_plan, msgtag2, export_buf, 1, import_buf);
    if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from Zoltan_Comm_Do.");
        goto End;
    }

    /*
     *  Free whatever memory we can.
     */

    Zoltan_Comm_Destroy(&exp_plan);
    ZOLTAN_FREE(&export_buf);
    ZOLTAN_FREE(&sizes);

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done communication");

    /*
     *  Perform application-specified processing before unpacking the data.
     */
    if (zz->Migrate.Mid_Migrate_PP != NULL) {
        zz->Migrate.Mid_Migrate_PP(zz->Migrate.Mid_Migrate_PP_Data,
                                   num_gid_entries, num_lid_entries,
                                   num_import, import_global_ids,
                                   import_local_ids, import_procs, import_to_part,
                                   num_export, export_global_ids,
                                   export_local_ids, export_procs, export_to_part,
                                   &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_MID_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Mid_Migrate != NULL) {
        zz->Migrate.Mid_Migrate(zz->Migrate.Mid_Migrate_Data,
                                num_gid_entries, num_lid_entries,
                                num_import, import_global_ids,
                                import_local_ids, import_procs,
                                num_export, export_global_ids,
                                export_local_ids, export_procs,
                                &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_MID_MIGRATE_FN function.");
            goto End;
        }
    }

    /*
     *  Unpack the object data.
     */

    if (actual_num_imp > 0) {

        if (zz->Unpack_Obj_Multi != NULL) {

            /* Allocate and fill input arrays for Unpack_Obj_Multi. */
            sizes = (int *) ZOLTAN_MALLOC(actual_num_imp * sizeof(int));
            tmp_id = (ZOLTAN_ID_PTR) ZOLTAN_MALLOC_GID_ARRAY(zz, actual_num_imp);
            idx = (int *) ZOLTAN_MALLOC(actual_num_imp * sizeof(int));
            if (!sizes || !tmp_id || !idx) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
                ierr = ZOLTAN_MEMERR;
                goto End;
            }

            tmp = import_buf;
            idx_cnt = 0;
            for (i = 0; i < actual_num_imp; i++) {

                /* Unpack the object's global ID */
                ZOLTAN_SET_GID(zz, &(tmp_id[i*num_gid_entries]), (ZOLTAN_ID_PTR) tmp);
                tmp += id_size;

                /* Unpack the object's size */
                sizes[i] = *((int *)tmp);
                tmp += aligned_int;

                /* If using ZOLTAN_UNPACK_OBJ_MULTI_FN, build the index array. */
                idx_cnt += tag_size;
                if (idx != NULL) {
                    idx[i] = idx_cnt;
                }

                tmp += sizes[i];
                idx_cnt += sizes[i];
            }

            if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                printf("[%1d] DEBUG in %s: Unpacking objects with multi-fn\n",
                       zz->Proc,yo);
            }
            zz->Unpack_Obj_Multi(zz->Unpack_Obj_Multi_Data, num_gid_entries,
                                 actual_num_imp, tmp_id, sizes, idx, import_buf, &ierr);
            ZOLTAN_FREE(&import_buf);
            ZOLTAN_FREE(&sizes);
            ZOLTAN_FREE(&tmp_id);
            ZOLTAN_FREE(&idx);
            if (ierr < 0) {
                ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                   "ZOLTAN_UNPACK_OBJ_MULTI_FN.");
                goto End;
            }
        }
        else {
            tmp = import_buf;
            for (i = 0; i < actual_num_imp; i++) {
                tmp_size = *((int *)(tmp + id_size));
                if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL) {
                    printf("[%1d] DEBUG in %s: Unpacking object with gid ", zz->Proc, yo);
                    ZOLTAN_PRINT_GID(zz, (ZOLTAN_ID_PTR)tmp);
                    printf("size = %d bytes\n", tmp_size);
                }

                /* Unpack the object's data */

                zz->Unpack_Obj(zz->Unpack_Obj_Data, num_gid_entries,
                               (ZOLTAN_ID_PTR) tmp, tmp_size,
                               tmp + tag_size, &ierr);
                if (ierr < 0) {
                    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                                       "ZOLTAN_UNPACK_OBJ_FN.");
                    goto End;
                }
                tmp += (tmp_size + tag_size);
            }
            ZOLTAN_FREE(&import_buf);
        }
    }

    ZOLTAN_TRACE_DETAIL(zz, yo, "Done unpacking objects");

    if (zz->Migrate.Post_Migrate_PP != NULL) {
        zz->Migrate.Post_Migrate_PP(zz->Migrate.Post_Migrate_PP_Data,
                                    num_gid_entries, num_lid_entries,
                                    num_import, import_global_ids,
                                    import_local_ids, import_procs, import_to_part,
                                    num_export, export_global_ids,
                                    export_local_ids, export_procs, export_to_part,
                                    &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_POST_MIGRATE_PP_FN function.");
            goto End;
        }
    }

    if (zz->Migrate.Post_Migrate != NULL) {
        zz->Migrate.Post_Migrate(zz->Migrate.Post_Migrate_Data,
                                 num_gid_entries, num_lid_entries,
                                 num_import, import_global_ids,
                                 import_local_ids, import_procs,
                                 num_export, export_global_ids,
                                 export_local_ids, export_procs,
                                 &ierr);
        if (ierr < 0) {
            ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from "
                               "ZOLTAN_POST_MIGRATE_FN function.");
            goto End;
        }
    }

End:

    if (actual_exp_allocated) {
        Zoltan_Multifree(__FILE__, __LINE__, 4,
                         &actual_exp_gids, &actual_exp_lids,
                         &actual_exp_procs, &actual_exp_to_part);
    }
    if (actual_imp_allocated) {
        Zoltan_Multifree(__FILE__, __LINE__, 4,
                         &actual_imp_gids, &actual_imp_lids,
                         &actual_imp_procs, &actual_imp_to_part);
    }

    if (ierr < 0) {
        if (exp_plan) Zoltan_Comm_Destroy(&exp_plan);
        Zoltan_Multifree(__FILE__, __LINE__, 5,
                         &import_buf, &tmp_id, &sizes, &idx, &export_buf);
    }
    ZOLTAN_TRACE_EXIT(zz, yo);
    return (ierr);
}
コード例 #4
0
static int greedy_grow_part (
  ZZ *zz,
  HGraph *hg,		/* Hypergraph. */
  int start_vtx,	/* Start the ordering from this vertex. */
  int p,		/* Number of partitions (must be 2). */
  float *part_sizes,    /* Array of length p containing the percentages of
                           work to be assigned to each partition. */
  Partition part,	/* Output: Partition array. */
  PHGPartParams *hgp    /* Partitioning parameters. */
)
{
  int i, j, vtx, edge, edgesize;
  int *cut[2];
  double *gain = NULL;
  int vwgtdim = hg->VtxWeightDim;
  int part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
  double weight_sum, part_sum;
  double cutoff;
  double psize_sum= 0.0;
  HEAP h[2];
  static char *yo = "greedy_grow_part";
  int err=ZOLTAN_OK;

  /* Allocate arrays. */
  if (!(gain  = (double*) ZOLTAN_CALLOC (hg->nVtx, sizeof (double)))){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    err =  ZOLTAN_MEMERR;
    goto End;
  }

  /* Initially put all vertices in part 0, except fixed ones. */
  for (i=0; i<hg->nVtx; i++)
    part[i] = 0;   
  if (hgp->UsePrefPart){
    for (i=0; i<hg->nVtx; i++)
      if ((hg->bisec_split >= 0) && (hg->pref_part[i] >= hg->bisec_split))
        part[i] = 1;   
  }
 
  cut[0]  = (int*) ZOLTAN_CALLOC (2*hg->nEdge, sizeof (int));
  if ((hg->nEdge > 0 && cut[0] == NULL) ) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
    err = ZOLTAN_MEMERR;
    goto End;
  }
  cut[1] = &(cut[0][hg->nEdge]);

  /* Initialize cut values. */
  for (i=0; i<hg->nEdge; i++)
    for (j=hg->hindex[i]; j<hg->hindex[i+1]; j++)
      (cut[part[hg->hvertex[j]]][i])++;

  /* Initialize gain values. */
  for (i=0; i<hg->nVtx; i++){
    /* compute gain only if vertex is free */
    if (!hgp->UseFixedVtx || (hg->fixed_part[i] < 0)) 
      for (j=hg->vindex[i]; j<hg->vindex[i+1]; j++) {
        edge = hg->vedge[j];
        edgesize = cut[0][edge]+cut[1][edge];
        /* if edge is not cut by fixed vertices, update gain value */
        if (MIN(cut[0][edge],cut[1][edge])==0)
          gain[i] -= (hg->ewgt ? (hg->ewgt[edge]) : 1.0);
        if (edgesize>1 && cut[part[i]][edge]==1)
          gain[i] += (hg->ewgt ? (hg->ewgt[edge]) : 1.0);
      }
  }

  /* Sum total weights. (No multi-weights yet) */
  weight_sum = 0.;
  part_sum = 0.0;  /* Weight in the growing partition (1) */
  for (i=0; i<hg->nVtx; i++){
    weight_sum += hg->vwgt[i*vwgtdim];
    if (part[i]>0) 
      part_sum  += hg->vwgt[i*vwgtdim];
  }

  /* Set cutoff for growing partition (1) */
  psize_sum = part_sizes[0] + part_sizes[part_dim];
  cutoff = weight_sum*part_sizes[part_dim]/psize_sum;

  if (hgp->output_level >= PHG_DEBUG_ALL)
    printf("Debug: Starting new greedy growing at vertex %d, part=%2d\n", start_vtx, p);

  /* Initialize heap. */
  if (!hgp->UseFixedVtx) 
    gain[start_vtx] = 1e10;      /* Make start_vtx max value in heap. */
                                 /* All other values should be negative. */
  Zoltan_Heap_Init(zz, &h[0], hg->nVtx);
  Zoltan_Heap_Init(zz, &h[1], 0);       /* Dummy heap, not used. */
  for (i=0; i<hg->nVtx; i++){
    /* Insert all non-fixed vertices into heap. */
    if (!hgp->UseFixedVtx || (hg->fixed_part[i] < 0))
      Zoltan_Heap_Input(h, i, gain[i]);
  }
  Zoltan_Heap_Make(h);


  while (part_sum < cutoff) {

    /* Get next vertex from heap */
    vtx = Zoltan_Heap_Extract_Max(h);

    if (vtx < 0) {
      /* Empty heap: This can only happen if all vertices are fixed. */
      break;
    }

    part_sum += hg->vwgt[vtx*vwgtdim];
    part[vtx] = 1;
    if (hgp->output_level >= PHG_DEBUG_PRINT)
      printf("COARSE_PART vtx=%2d, part[%2d]=%2d, part_sum=%f\n",
              vtx,vtx,part[vtx],part_sum);

    /* Move vertex from part=0 to part=1. */
    /* Update gain values for nbors. */
    /* We use Zoltan_HG_move_vertex from the refinement code. */
    Zoltan_HG_move_vertex(hg, vtx, 0, 1, part, cut, gain, h);

  }

End:
  ZOLTAN_FREE (&gain);
  if (cut[0])    ZOLTAN_FREE (&cut[0]);
  Zoltan_Heap_Free (&h[0]);
  Zoltan_Heap_Free( &h[1]);
  return err;
}
コード例 #5
0
int Zoltan_RB_Box_Assign(
ZZ             *zz,             /* The Zoltan structure */
double          xmin,           /* lower x extent of box */
double          ymin,           /* lower y extent of box */
double          zmin,           /* lower z extent of box */
double          xmax,           /* upper x extent of box */
double          ymax,           /* upper y extent of box */
double          zmax,           /* upper z extent of box */
int            *procs,          /* list of procs that box intersects */
int            *numprocs,       /* number of processors in proc list */
int            *parts,          /* list of parts that box intersects */
int            *numparts)       /* number of partitions in part list */
{
/* Determine which partitions and processors a box intersects.
   Currently assumes that partitioning has used RCB or RIB, but should be
   modified to return an error message if other method was used */

     static char       *yo = "Zoltan_RB_Box_Assign";
     RCB_STRUCT        *rcb;    /* Pointer to data structures for RCB. */
     struct rcb_tree   *treept; /* tree of RCB cuts */
     RIB_STRUCT        *rib;    /* Pointer to data structures for RIB. */
     struct rib_tree   *itree;  /* tree of RIB cuts */
     struct rcb_box    box;     /* box data structure */
     int               *proc_array = NULL;  
                                /* Array of size zz->Num_Proc; initialized
                                   to 0; entry i incremented each time 
                                   a found partition is on processor i. 
                                   Added to support 
                                   !zz->LB.Single_Proc_Per_Part. */
     int               include_procs = (procs != NULL);
     int               include_parts = (parts != NULL);
     int               ierr = ZOLTAN_OK;
     int               i;
     double            p[8][3];

     if (zz->LB.Data_Structure == NULL) {
        ZOLTAN_PRINT_ERROR(-1, yo, 
          "No Decomposition Data available; use KEEP_CUTS parameter.");
        ierr = ZOLTAN_FATAL;
        goto End;
     }

     if (include_procs) {
        proc_array = (int *) ZOLTAN_CALLOC(zz->Num_Proc, sizeof(int));
        if (!proc_array) {
           ierr = ZOLTAN_MEMERR;
           goto End;
        }
     }

     *numprocs = *numparts = 0;

     if (zz->LB.Method == RCB) {
        rcb = (RCB_STRUCT *) (zz->LB.Data_Structure);
        treept = rcb->Tree_Ptr;
        if (treept[0].dim < 0) {     /* RCB tree was never created. */
           ZOLTAN_PRINT_ERROR(zz->Proc, yo, "No RCB tree saved; "
             " Must set parameter KEEP_CUTS to 1.");
           ierr = ZOLTAN_FATAL;
           goto End;
        }

        box.lo[0] = xmin;
        box.lo[1] = ymin;
        box.lo[2] = zmin;
        box.hi[0] = xmax;
        box.hi[1] = ymax;
        box.hi[2] = zmax;

        if (rcb->Tran.Target_Dim > 0){
          /* 
           * Degenerate geometry, transform box to the lower dimensional
           * space that the partitioning occured in.  Our new box may
           * encompass more partitions, but it won't miss any.
           */
          Zoltan_Transform_Box(box.lo, box.hi, rcb->Tran.Transformation, 
                    rcb->Tran.Permutation, rcb->Num_Dim, rcb->Tran.Target_Dim);
        }

        Box_Assign(zz, treept, &box, include_procs, include_parts, 
                   proc_array, parts, numparts, treept[0].right_leaf);
     }
     else if (zz->LB.Method == RIB) {
        rib = (RIB_STRUCT *) (zz->LB.Data_Structure);
        itree = rib->Tree_Ptr;
        if (itree[0].right_leaf < 0) { /* RIB tree was never created. */
           ZOLTAN_PRINT_ERROR(zz->Proc, yo, "No RIB tree saved;"
             " Must set parameter KEEP_CUTS to 1.");
           ierr = ZOLTAN_FATAL;
           goto End;
        }


        switch (rib->Num_Geom) {
           case 3:
              box.lo[0] = xmin;
              box.lo[1] = ymin;
              box.lo[2] = zmin;
              box.hi[0] = xmax;
              box.hi[1] = ymax;
              box.hi[2] = zmax;

              if (rib->Tran.Target_Dim <= 0){
                Box_Assign3(zz, itree, &box, include_procs, include_parts, 
                          proc_array, parts, numparts, itree[0].right_leaf);
              }
              else{ /* degenerate geometry, Target_Dim is 2 or 1 */

                Zoltan_Transform_Box_Points(box.lo, box.hi,
                  rib->Tran.Transformation, rib->Tran.Permutation, 3, 
                  rib->Tran.Target_Dim, p);

                if (rib->Tran.Target_Dim == 1){  /* box -> line */

                  box.lo[0] = box.hi[0] = p[0][0];
                  for (i=1; i<8; i++){
                    if (p[i][0] < box.lo[0]) box.lo[0] = p[i][0]; 
                    else if (p[i][0] > box.hi[0]) box.hi[0] = p[i][0]; 
                  }
                  Box_Assign1(zz, itree, &box, include_procs, include_parts,
                          proc_array, parts, numparts, itree[0].right_leaf);
                }
                else{     /* box -> plane, no longer axis-aligned */

                  Transformed_Box_Assign(zz, itree, p, rib->Tran.Target_Dim,
                    include_procs, include_parts, proc_array, parts, numparts, 
                    itree[0].right_leaf);
                }
              }

              break;
           case 2:
              box.lo[0] = xmin;
              box.lo[1] = ymin;
              box.hi[0] = xmax;
              box.hi[1] = ymax;

              if (rib->Tran.Target_Dim <= 0){
                Box_Assign2(zz, itree, &box, include_procs, include_parts,
                            proc_array, parts, numparts, itree[0].right_leaf);
              }
              else{

                /* degenerate geometry, Target_Dim is 1 */

                Zoltan_Transform_Box_Points(box.lo, box.hi,
                  rib->Tran.Transformation, rib->Tran.Permutation, 2, 1, p);

                box.lo[0] = box.hi[0] = p[0][0];
                for (i=1; i<4; i++){
                  if (p[i][0] < box.lo[0]) box.lo[0] = p[i][0]; 
                  else if (p[i][0] > box.hi[0]) box.hi[0] = p[i][0]; 
                }

                Box_Assign1(zz, itree, &box, include_procs, include_parts,
                          proc_array, parts, numparts, itree[0].right_leaf);
              }

              break;
           case 1:
              box.lo[0] = xmin;
              box.hi[0] = xmax;

              Box_Assign1(zz, itree, &box, include_procs, include_parts,
                          proc_array, parts, numparts, itree[0].right_leaf);

              break;
        }
     }
     else {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
              "Valid only when load-balancing method is RCB and RIB.");
        ierr = ZOLTAN_FATAL;
        goto End;
     }

     if (include_procs) {
        for (i = 0; i < zz->Num_Proc; i++)
           if (proc_array[i] > 0)
              procs[(*numprocs)++] = i;
     }

End:
     ZOLTAN_FREE(&proc_array);

     if (ierr != ZOLTAN_OK) {
        if (include_procs) *procs = -1;
        *numprocs = 0;
        if (include_parts) *parts = -1;
        *numparts = 0;
     }
     return ierr;
}
コード例 #6
0
ファイル: zz_map.c プロジェクト: haripandey/trilinos
/*****************************************************************
 * Create a new map.
 * Return the map number to be used in subsequent calls.
 * (Return NULL on error)
 */
ZOLTAN_MAP* Zoltan_Map_Create(ZZ *zz,     /* just need this for error messages */
		      int hash_range_max, /* > 0: maximum hash value */
					  /*   0: Zoltan_Map_Create will choose value */
		      int num_id_entries, /* length of a key */
		      int store_keys,     /* 1 - keep a copy of each key */
					  /* 0 - keep a pointer to caller's key */
		      int num_entries)    /* > 0: number of keys that will be added */
					  /*   0: dynamically allocate entries */
{
  char *yo = "Zoltan_Map_Create";
  ZOLTAN_ENTRY *top = NULL;
  ZOLTAN_ENTRY **entries = NULL;
  ZOLTAN_MAP* map = NULL;
  int *keys = NULL;
  int my_range ;

  if ((num_id_entries < 1) || (num_entries < 0) || (hash_range_max < 0)){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Bad parameters\n");
    return NULL;
  }

  map = (ZOLTAN_MAP*) ZOLTAN_MALLOC(sizeof(ZOLTAN_MAP));

  if (num_entries > 0){

    /* we create storage for the entries in advance */

    top = (ZOLTAN_ENTRY *)ZOLTAN_CALLOC(num_entries, sizeof(ZOLTAN_ENTRY));
    if (!top){
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory\n");
      return NULL;
    }
    if (store_keys) {
      keys = (int*)ZOLTAN_CALLOC(num_entries, sizeof(int)*num_id_entries);
      if (!keys) {
	ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory\n");
	return NULL;
      }
    }
  }

  /* hash table */

  if (hash_range_max == 0){
    if (num_entries > 0){
      /* SRSR : This hash range maximum will be a prime number closer to
      num_entries for smaller problems and closer to num_entries/2 for larger
      problems. For very small problems hash_range_max can be larger than
      num_entries, but choosing hash_range_max = num_entries may lead to
      too many collisions. */
      my_range = (int) pow((double)num_entries, 0.90) ;
      hash_range_max = Zoltan_Recommended_Hash_Size(2*my_range) ;
    }
    else{
      /* This could be a performance bottleneck depending on num_entries.  */
      hash_range_max = 1000;
    }
  }

  entries = (ZOLTAN_ENTRY **)ZOLTAN_CALLOC(hash_range_max+1, sizeof(ZOLTAN_ENTRY*));

  if (!entries){
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory\n");
    return NULL;
  }


  map->entries     = entries;
  map->top         = top;
  map->id_size     = num_id_entries;
  map->max_index   = hash_range_max;
  map->max_entries = num_entries;
  map->prev_index      = -1;
  map->prev_hash_index = -1;
  map->prev            = NULL;
  map->used            = 1;
  map->dynamicEntries  = (num_entries == 0);
  map->copyKeys        = store_keys;
  map->entry_count     = 0;
  map->keys            = keys;

  return map;
}
コード例 #7
0
ファイル: matrix_build.c プロジェクト: agrippa/Trilinos
int
Zoltan_Matrix_Build (ZZ* zz, Zoltan_matrix_options *opt, Zoltan_matrix* matrix,
  int request_GNOs,                /* Input:  Flag indicating calling code 
                                              needs translation of extra GIDs
                                              to GNOs; partial 2D coloring
                                              needs this feature. */
  int num_requested,               /* Input:  Local # of GIDs needing 
                                              translation to GNOs. */
  ZOLTAN_ID_PTR requested_GIDs,    /* Input:  Calling code requests the 
                                              GNOs for these GIDs */
  ZOLTAN_GNO_TYPE *requested_GNOs  /* Output: Return GNOs of 
                                              the requested GIDs.  */
)  
{
  static char *yo = "Zoltan_Matrix_Build";
  int ierr = ZOLTAN_OK;
  int nX;
  ZOLTAN_GNO_TYPE tmp;
  ZOLTAN_GNO_TYPE *xGNO = NULL;
  ZOLTAN_ID_PTR xLID=NULL;
  ZOLTAN_ID_PTR xGID=NULL;
  ZOLTAN_ID_PTR yGID=NULL;
  ZOLTAN_ID_PTR pinID=NULL;
  float *xwgt = NULL;
  int * Input_Parts=NULL;
  struct Zoltan_DD_Struct *dd = NULL;
  int *proclist = NULL;
  int *xpid = NULL;
  int i;
  int gno_size_for_dd;
  MPI_Datatype zoltan_gno_mpi_type;
  int use_full_dd = (opt->speed == MATRIX_FULL_DD);
  int fast_build_base = opt->fast_build_base;
  matrix->opts.speed = opt->speed;  
  matrix->opts.fast_build_base = opt->fast_build_base;

  ZOLTAN_TRACE_ENTER(zz, yo);

  if (num_requested && (!requested_GIDs || !requested_GNOs)) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                       "Error in requested input; needed arrays are NULL.\n");
  }

  /* ZOLTAN_GNO_TYPE is >= ZOLTAN_ID_TYPE */
  gno_size_for_dd = sizeof(ZOLTAN_GNO_TYPE) / sizeof(ZOLTAN_ID_TYPE);

  zoltan_gno_mpi_type = Zoltan_mpi_gno_type();

  memset (matrix, 0, sizeof(Zoltan_matrix)); /* Set all fields to 0 */
  memcpy (&matrix->opts, opt, sizeof(Zoltan_matrix_options));

  /**************************************************/
  /* Obtain vertex information from the application */
  /**************************************************/

  ierr = Zoltan_Get_Obj_List(zz, &nX, &xGID, &xLID,
			     zz->Obj_Weight_Dim, &xwgt,
			     &Input_Parts);

  if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error getting object data");
    goto End;
  }
  ZOLTAN_FREE(&Input_Parts);
  ZOLTAN_FREE(&xwgt);

  /*******************************************************************/
  /* Assign vertex consecutive numbers (gnos)                        */
  /*******************************************************************/

  if (use_full_dd) {
    /* Zoltan computes a translation */
    /* Have to use Data Directory if request_GNOs is true. */
    if (nX) {
      xGNO = (ZOLTAN_GNO_TYPE*) ZOLTAN_MALLOC(nX*sizeof(ZOLTAN_GNO_TYPE));
      if (xGNO == NULL)
	MEMORY_ERROR;
    }
    ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, xGNO, nX, matrix->opts.randomize, &matrix->globalX);

    if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error assigning global numbers to vertices");
      goto End;
    }

    ierr = Zoltan_DD_Create (&dd, zz->Communicator, zz->Num_GID, gno_size_for_dd, 0, nX, 0);
    CHECK_IERR;

    /* Make our new numbering public */
    Zoltan_DD_Update (dd, xGID, (ZOLTAN_ID_PTR) xGNO, NULL,  NULL, nX);
    if (request_GNOs) {
      Zoltan_DD_Find(dd, requested_GIDs, (ZOLTAN_ID_PTR) requested_GNOs,
                     NULL, NULL, num_requested, NULL);
    }
  }
  else { /* We don't want to use the DD */
     /*
     * KDDKDD 2/10/11  This code cannot work when NUM_GID_ENTRIES>1.
     * KDDKDD 2/10/11  The assumption is that, if a user sets the
     * KDDKDD 2/10/11  appropriate parameter to enable this code, the user
     * KDDKDD 2/10/11  knows that his GIDs are compatible with integers.
     */
    if (sizeof(ZOLTAN_GNO_TYPE) != sizeof(ZOLTAN_ID_TYPE)){
      xGNO = (ZOLTAN_GNO_TYPE*) ZOLTAN_MALLOC(nX*sizeof(ZOLTAN_GNO_TYPE));
      if (nX && xGNO == NULL)
        MEMORY_ERROR;
      for (i=0; i < nX; i++)
        xGNO[i] = (ZOLTAN_GNO_TYPE)xGID[i] - fast_build_base;
    }
    else {
      xGNO = (ZOLTAN_GNO_TYPE *)xGID;
      if (fast_build_base)
        for (i = 0; i < nX; i++)
          xGNO[i] -= fast_build_base;
    }

    for (i = 0; i < num_requested; i++)
      requested_GNOs[i] = (ZOLTAN_GNO_TYPE)requested_GIDs[i]
                        - fast_build_base;
     
    tmp = (ZOLTAN_GNO_TYPE)nX; 
    MPI_Allreduce(&tmp, &matrix->globalX, 1, zoltan_gno_mpi_type, MPI_SUM, zz->Communicator);
  }

  /* I store : xGNO, xGID, xpid,  */

  ierr = Zoltan_DD_Create (&matrix->ddX, zz->Communicator, gno_size_for_dd, zz->Num_GID,
			   sizeof(int), matrix->globalX/zz->Num_Proc, 0);
  CHECK_IERR;

  /* Hope a linear assignment will help a little */
  if (matrix->globalX/zz->Num_Proc)
    Zoltan_DD_Set_Neighbor_Hash_Fn1(matrix->ddX, matrix->globalX/zz->Num_Proc);
  /* Associate all the data with our xGNO */
  xpid = (int*)ZOLTAN_MALLOC(nX*sizeof(int));
  if (nX >0 && xpid == NULL) MEMORY_ERROR;
  for (i = 0 ; i < nX ; ++i)
    xpid[i] = zz->Proc;

  Zoltan_DD_Update (matrix->ddX, (ZOLTAN_ID_PTR)xGNO, xGID, (char *)xpid, NULL, nX);
  ZOLTAN_FREE(&xpid);

  if (matrix->opts.pinwgt)
    matrix->pinwgtdim = zz->Edge_Weight_Dim;
  else
    matrix->pinwgtdim = 0;

  ierr = matrix_get_edges(zz, matrix, &yGID, &pinID, nX, &xGID, &xLID, &xGNO, &xwgt, use_full_dd);
  CHECK_IERR;
  matrix->nY_ori = matrix->nY;

  if ((ierr != ZOLTAN_OK) && (ierr != ZOLTAN_WARN)){
    goto End;
  }

  if (matrix->opts.enforceSquare && matrix->redist) {
    /* Convert yGID to yGNO using the same translation as x */
    /* Needed for graph : rowID = colID */
    /* y and x may have different distributions */
    matrix->yGNO = (ZOLTAN_GNO_TYPE*)ZOLTAN_MALLOC(matrix->nY * sizeof(ZOLTAN_GNO_TYPE));
    if (matrix->nY && matrix->yGNO == NULL) {
      ZOLTAN_FREE(&pinID);
      MEMORY_ERROR;
    }
    ierr = Zoltan_DD_Find (dd, yGID, (ZOLTAN_ID_PTR)(matrix->yGNO), NULL, NULL,
		    matrix->nY, NULL);
    if (ierr != ZOLTAN_OK) {
      ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Hyperedge GIDs don't match.\n");
      ierr = ZOLTAN_FATAL;
      ZOLTAN_FREE(&pinID);
      goto End;
    }
  }

  if (matrix->opts.local) { /* keep only local edges */
    proclist = (int*) ZOLTAN_MALLOC(matrix->nPins*sizeof(int));
    if (matrix->nPins && proclist == NULL) { 
      ZOLTAN_FREE(&pinID);
      MEMORY_ERROR;
    }
  }
  else
    proclist = NULL;

  /* Convert pinID to pinGNO using the same translation as x */
  if (use_full_dd) {
    matrix->pinGNO = (ZOLTAN_GNO_TYPE*)ZOLTAN_MALLOC(matrix->nPins* sizeof(ZOLTAN_GNO_TYPE));
    if ((matrix->nPins > 0) && (matrix->pinGNO == NULL)) {
        ZOLTAN_FREE(&pinID);
        MEMORY_ERROR;
    }

    ierr = Zoltan_DD_Find (dd, pinID, (ZOLTAN_ID_PTR)(matrix->pinGNO), NULL, NULL,
			   matrix->nPins, proclist);
    if (ierr != ZOLTAN_OK) {
      ZOLTAN_PRINT_ERROR(zz->Proc,yo,"Undefined GID found.\n");
      ierr = ZOLTAN_FATAL;
      goto End;
    }
    ZOLTAN_FREE(&pinID);
    Zoltan_DD_Destroy(&dd);
    dd = NULL;
  }
  else {
    if (sizeof(ZOLTAN_GNO_TYPE) != sizeof(ZOLTAN_ID_TYPE)){
      matrix->pinGNO = (ZOLTAN_GNO_TYPE *)ZOLTAN_MALLOC(matrix->nPins * sizeof(ZOLTAN_GNO_TYPE));
      if (matrix->nPins && !matrix->pinGNO){
        ZOLTAN_FREE(&pinID);
        MEMORY_ERROR;
      }
      for (i=0; i < matrix->nPins; i++)
        matrix->pinGNO[i] = (ZOLTAN_GNO_TYPE)pinID[i] - fast_build_base;
      
      ZOLTAN_FREE(&pinID);
    }
    else{
      matrix->pinGNO = (ZOLTAN_GNO_TYPE *) pinID;
      if (fast_build_base)
        for (i=0; i < matrix->nPins; i++)
          matrix->pinGNO[i] -= fast_build_base;
      pinID = NULL;
    }
  }

/*   if (matrix->opts.local) {  /\* keep only local edges *\/ */
/*     int *nnz_list; /\* nnz offset to delete *\/ */
/*     int nnz;       /\* number of nnz to delete *\/ */
/*     int i; */

/*     nnz_list = (int*) ZOLTAN_MALLOC(matrix->nPins*sizeof(int)); */
/*     if (matrix->nPins && nnz_list == NULL) MEMORY_ERROR; */
/*     for (i = 0, nnz=0 ; i < matrix->nPins ; ++i) { */
/*       if (proclist[i] == zz->Proc) continue; */
/*       nnz_list[nnz++] = i; */
/*     } */
/*     ZOLTAN_FREE(&proclist); */
/*     Zoltan_Matrix_Delete_nnz(zz, matrix, nnz, nnz_list); */
/*   } */

  if (!matrix->opts.enforceSquare) {
    /* Hyperedges name translation is different from the one of vertices */
    matrix->yGNO = (ZOLTAN_GNO_TYPE*)ZOLTAN_CALLOC(matrix->nY, sizeof(ZOLTAN_GNO_TYPE));
    if (matrix->nY && matrix->yGNO == NULL) MEMORY_ERROR;

    /*     int nGlobalEdges = 0; */
    ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, matrix->yGNO, matrix->nY,
					     matrix->opts.randomize, &matrix->globalY);
    CHECK_IERR;

/*     /\**************************************************************************************** */
/*      * If it is desired to remove dense edges, divide the list of edges into */
/*      * two lists.  The ZHG structure will contain the removed edges (if final_output is true), */
/*      * and the kept edges will be returned. */
/*      ****************************************************************************************\/ */
/*     totalNumEdges = zhg->globalHedges; */

/*     ierr = remove_dense_edges_matrix(zz, zhg, edgeSizeThreshold, final_output, */
/*				     &nLocalEdges, &nGlobalEdges, &nPins, */
/*				     &edgeGNO, &edgeSize, &edgeWeight, &pinGNO, &pinProcs); */

/*     if (nGlobalEdges < totalNumEdges){ */
/*       /\* re-assign edge global numbers if any edges were removed *\/ */
/*       ierr = Zoltan_PHG_GIDs_to_global_numbers(zz, edgeGNO, nLocalEdges, */
/*					       randomizeInitDist, &totalNumEdges); */
/*       if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN) { */
/*	ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error reassigning global numbers to edges"); */
/*	goto End; */
/*       } */
/*     } */

      /* We have to define ddY : yGNO, yGID, ywgt */
      ierr = Zoltan_DD_Create (&matrix->ddY, zz->Communicator, gno_size_for_dd, zz->Num_GID,
			       0, matrix->globalY/zz->Num_Proc, 0);
      /* Hope a linear assignment will help a little */
      if (matrix->globalY/zz->Num_Proc)
        Zoltan_DD_Set_Neighbor_Hash_Fn1(matrix->ddY, matrix->globalY/zz->Num_Proc);
      /* Associate all the data with our yGNO */
      Zoltan_DD_Update (matrix->ddY, (ZOLTAN_ID_PTR)matrix->yGNO, yGID, NULL, NULL, matrix->nY);
  }

 End:
  ZOLTAN_FREE(&xpid);
  ZOLTAN_FREE(&xLID);
  ZOLTAN_FREE(&xGNO);
  ZOLTAN_FREE(&xGID);
  ZOLTAN_FREE(&xwgt);
  ZOLTAN_FREE(&Input_Parts);
  ZOLTAN_FREE(&proclist);
  if (dd != NULL)
    Zoltan_DD_Destroy(&dd);
  /* Already stored in the DD */
  ZOLTAN_FREE(&yGID);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
コード例 #8
0
static int gather_and_build_remap(
  ZZ *zz, 
  int *new_map,               /* Upon return, flag indicating whether parts
                                 assignments were changed due to remap. */
  int HEcnt,                  /* # of HEs allocated. */
  int *HEinfo                 /* Array of HE info; for each HE, two pins and 
                                 one edge weight. Stored as a single vector
                                 to minimize communication calls.  */
)
{
char *yo = "gather_and_remap";
int ierr = ZOLTAN_OK;
int i, uidx, tmp;
int *each_size = NULL;        /* sizes (# HEs * HEINFO_ENTRIES) for each proc */
int *recvbuf = NULL;          /* Receive buffer for gatherv */
int *displs = NULL;           /* Displacement buffer for gatherv */
int send_size;                /* Local # HEs * HEINFO_ENTRIES */
int total_size;               /* Total # ints in gatherv */
int total_HEcnt;              /* Total (across all procs) number of HEs. */
int max0, max1;               /* Max values of pin 0 and pin 1 for each HE. */
int *match = NULL;            /* Vector describing the matching. 
                                 match[i] = j ==> match[j] = i ==> 
                                 vertices i and j are matched. */
int *used = NULL;             /* Vector indicating which partitions are used
                                 in the matching. */
int limit;                    /* Maximum number of matches that are allowed */
HGraph hg;                    /* Hypergraph for matching */
float before_remap = 0,       /* Amount of data that overlaps between old and */
      after_remap = 0;        /* new decomposition before and after remapping, 
                                 respectively. */
float with_oldremap = 0;      /* Amount of data that overlaps between old and
                                 new decomposition using the OldRemap vector
                                 (remapping from the previous decomposition). */


  /* Gather HEs from each processor into a local complete HG. */

  each_size = (int *) ZOLTAN_MALLOC(zz->Num_Proc * sizeof(int));
  if (!each_size) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }
  send_size = HEcnt * HEINFO_ENTRIES;
  MPI_Allgather(&send_size, 1, MPI_INT, each_size, 1, MPI_INT,
                zz->Communicator);

  for (total_size = 0, i = 0; i < zz->Num_Proc; i++) {
    total_size += each_size[i];
  }

  recvbuf = (int *) ZOLTAN_MALLOC((zz->Num_Proc + total_size) * sizeof(int));
  displs = recvbuf + total_size;
  if (!recvbuf) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  displs[0] = 0;
  for (i = 1; i < zz->Num_Proc; i++)
    displs[i] = displs[i-1] + each_size[i-1];

  MPI_Allgatherv(HEinfo, send_size, MPI_INT, 
                 recvbuf, each_size, displs, MPI_INT, zz->Communicator);

  total_HEcnt = total_size / HEINFO_ENTRIES;
  for (max0 = -1, max1 = -1, i = 0; i < total_HEcnt; i++) {
    tmp = i * HEINFO_ENTRIES;
    if (recvbuf[tmp] > max0) max0 = recvbuf[tmp];
    if (recvbuf[tmp+1] > max1) max1 = recvbuf[tmp+1];
  }
  /* Increment max0 and max1 so that they are the maximum number of unique
     pin values for pin0 and pin1 respectively; i.e., allow pin value == 0. */
  max0++;
  max1++;
  
  /* Sanity check */
  /* Ideally, max1 should equal LB.Num_Global_Parts, but ParMETIS3 sometimes
   * does not return the correct number of non-empty partitions, allowing
   * max1 to be less than LB.Num_Global_Parts. 
   * (e.g., ewgt.adaptive-partlocal1-v3.4.?).
   */
  if (max1 > zz->LB.Num_Global_Parts) 
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Unexpected value for max1.");

  /* Set up global HG */

  Zoltan_HG_HGraph_Init(&hg);
  if (total_HEcnt) {
    hg.nVtx = max0 + zz->LB.Num_Global_Parts;  
    hg.nEdge = total_HEcnt;
    hg.nPins = total_HEcnt * 2;   /* two pins per HE */
    hg.EdgeWeightDim = 1;
    hg.ewgt = (float *) ZOLTAN_MALLOC(total_HEcnt * sizeof(float));
    hg.hindex = (int *) ZOLTAN_MALLOC((total_HEcnt + 1) * sizeof(int));
    hg.hvertex = (int *) ZOLTAN_MALLOC((hg.nPins) * sizeof(int));
    if (!hg.ewgt || !hg.hindex || !hg.hvertex) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < total_HEcnt; i++) {
      tmp = i * HEINFO_ENTRIES;
      hg.hindex[i] = i+i; 
      hg.hvertex[i+i] = recvbuf[tmp];
      hg.hvertex[i+i+1] = recvbuf[tmp+1]+max0;
      hg.ewgt[i] = recvbuf[tmp+2];
    }
    hg.hindex[total_HEcnt] = total_HEcnt + total_HEcnt;

    ierr = Zoltan_HG_Create_Mirror(zz, &hg);
    if (ierr < 0) goto End;
  }

  before_remap = measure_stays(zz, &hg, max0, NULL, "BEFORE");

  /* Compute the amount of overlap when using the old remap vector. */

  with_oldremap = measure_stays(zz, &hg, max0, zz->LB.OldRemap, "WITHOLD");

  /* Do matching */

  match = (int *) ZOLTAN_CALLOC(hg.nVtx + zz->LB.Num_Global_Parts, sizeof(int));
  used = match + hg.nVtx;
  if (hg.nVtx && !match) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
    ierr = ZOLTAN_MEMERR;
    goto End;
  }

  /* Max # matches allowed */
  limit = (max0 < zz->LB.Num_Global_Parts ? max0 : zz->LB.Num_Global_Parts); 
  do_match(zz, &hg, match, limit);

      
  /* Build remapping vector, if non-trivial matching was returned. */

  *new_map = 0;
  for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
    if (match[i+max0] != i+max0) {
      *new_map = 1;
      break;
    }

  if (*new_map) {

    zz->LB.Remap = (int *) ZOLTAN_MALLOC(zz->LB.Num_Global_Parts * sizeof(int));
    if (!(zz->LB.Remap)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }


    /* First, process all parts that were matched. Mark matched parts as used.*/

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      zz->LB.Remap[i] = -1; 
      tmp = match[i+max0];
      if (tmp != i+max0) {
        zz->LB.Remap[i] = tmp;
        used[tmp] = 1;
      }
    }

    /* Second, process unmatched parts; if possible, keep same part number. */

    for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      if (!used[i]) {  /* Keep the same part number if it is not used */
        zz->LB.Remap[i] = i;
        used[i] = 1;
      }
    }
  
    /* Third, process remaining unmatched parts; assign them to 
       unused partitions.*/
  
    for (uidx = 0, i = 0; i < zz->LB.Num_Global_Parts; i++) {
      if (zz->LB.Remap[i] > -1) continue;  /* Already processed part i */
      /* match[i+max0] == i+max0 */
      while (used[uidx]) uidx++;   /* Find next unused partition */
      zz->LB.Remap[i] = uidx;
      used[uidx] = 1;
    }
  }

  if (*new_map) 
    after_remap = measure_stays(zz, &hg, max0, zz->LB.Remap, "AFTER ");

  if ((before_remap >= after_remap) && (before_remap >= with_oldremap)) {
    /* No benefit from remapping; don't keep it! */
    ZOLTAN_FREE(&zz->LB.Remap);
    ZOLTAN_FREE(&zz->LB.OldRemap);
    *new_map = 0;
  }
  else if (with_oldremap >= after_remap) {
    /* The old remap vector is better than the new one; keep the old one. */
    ZOLTAN_FREE(&zz->LB.Remap);
    zz->LB.Remap = zz->LB.OldRemap;
    zz->LB.OldRemap = NULL;
    *new_map = 1;
  }
  else {
    /* Going to use the new remap vector; free the old one. */
    ZOLTAN_FREE(&zz->LB.OldRemap);
  }

  if (zz->Debug_Level >= ZOLTAN_DEBUG_ALL && zz->Proc == zz->Debug_Proc &&
      zz->LB.Remap) 
    for (i = 0; i < zz->LB.Num_Global_Parts; i++) 
      printf("%d REMAP Part %d to Part %d\n", zz->Proc, i, zz->LB.Remap[i]);

End:
  ZOLTAN_FREE(&match);
  ZOLTAN_FREE(&each_size);
  ZOLTAN_FREE(&recvbuf);
  Zoltan_HG_HGraph_Free(&hg);
  return ierr;
}
コード例 #9
0
/* if !copy, inmat is not usable after this call */
int
Zoltan_Matrix2d_Distribute (ZZ* zz, Zoltan_matrix inmat, /* Cannot be const as we can share it inside outmat */
			    Zoltan_matrix_2d *outmat, int copy)
{
  static char *yo = "Zoltan_Matrix2d_Distribute";
  int ierr = ZOLTAN_OK;
  int nProc_x, nProc_y;
  int myProc_x, myProc_y;
  int i, j, cnt;
  int *proclist = NULL;
  Zoltan_Arc *nonzeros= NULL, *sendbuf= NULL;
  ZOLTAN_GNO_TYPE *perm_y = NULL;
  float *wgtarray = NULL;
  float *tmpwgtarray = NULL;
  int msg_tag = 1021982;
  ZOLTAN_COMM_OBJ *plan;
  MPI_Comm communicator = MPI_COMM_NULL;
  int nProc;
  ZOLTAN_GNO_TYPE *yGNO = NULL;
  ZOLTAN_GNO_TYPE *pinGNO = NULL;
  ZOLTAN_GNO_TYPE tmp_gno;
  void *partdata = NULL;
  MPI_Datatype zoltan_gno_mpi_type;

  ZOLTAN_TRACE_ENTER(zz, yo);

  zoltan_gno_mpi_type = Zoltan_mpi_gno_type();

  memcpy(&outmat->mtx, &inmat, sizeof(Zoltan_matrix));
  if(copy) {
    /* TODO: We need to copy the arrays also */
    Zoltan_Matrix_Reset (&outmat->mtx);
    /* Copy also directories */
    outmat->mtx.ddX = Zoltan_DD_Copy (inmat.ddX);
    if (inmat.ddY == inmat.ddX)
      outmat->mtx.ddY = outmat->mtx.ddX;
    else
      outmat->mtx.ddY = Zoltan_DD_Copy (inmat.ddY);
  }

  communicator = outmat->comm->Communicator;
  nProc = outmat->comm->nProc;

  nProc_x = outmat->comm->nProc_x;
  nProc_y = outmat->comm->nProc_y;
  myProc_x = outmat->comm->myProc_x;
  myProc_y = outmat->comm->myProc_y;

KDDKDDKDD(zz->Proc, "    Zoltan_Matrix_Remove_Duplicates");
  ierr = Zoltan_Matrix_Remove_Duplicates(zz, outmat->mtx, &outmat->mtx);

/* KDDKDDKDD  FIX INDENTATION OF THIS BLOCK */
if (inmat.opts.speed != MATRIX_NO_REDIST) {
  if (outmat->hashDistFct == (distFnct *)&Zoltan_Distribute_Origin) {
    /* I need to know the original distribution */
    if (outmat->mtx.ddX != outmat->mtx.ddY) { /* No initial distribution */
      outmat->hashDistFct = (distFnct *)&Zoltan_Distribute_Linear;
    }
    else {
      int *cmember = NULL;

      cmember = (int*)ZOLTAN_MALLOC(outmat->mtx.nY*sizeof(int));
      if (outmat->mtx.nY > 0 && cmember == NULL) MEMORY_ERROR;
      Zoltan_DD_Find (outmat->mtx.ddY, (ZOLTAN_ID_PTR)outmat->mtx.yGNO, NULL, (char *)cmember, NULL,
		      outmat->mtx.nY, NULL);
KDDKDDKDD(zz->Proc, "    Zoltan_Distribute_Partition_Register");
      partdata = Zoltan_Distribute_Partition_Register(zz, outmat->mtx.nY, outmat->mtx.yGNO,
						      cmember, zz->Num_Proc, zz->Num_Proc);
      ZOLTAN_FREE(&cmember);
      Zoltan_Distribute_Set(outmat, (distFnct *)&Zoltan_Distribute_Origin, partdata);
    }
  }

  /*
   * Build comm plan for sending non-zeros to their target processors in
   * 2D data distribution.
   */
  /* TRICK: create fake arc (edgeno, -1) for empty Y. Upper bound for size might be nPins + nY */
  proclist = (int *)ZOLTAN_MALLOC((outmat->mtx.nPins+outmat->mtx.nY) *sizeof(int));
  sendbuf = (Zoltan_Arc*) ZOLTAN_MALLOC((outmat->mtx.nPins +outmat->mtx.nY)* sizeof(Zoltan_Arc));

  if ((outmat->mtx.nPins + outmat->mtx.nY >0) && (proclist == NULL || sendbuf == NULL)) MEMORY_ERROR;

  wgtarray = (float*) ZOLTAN_MALLOC((outmat->mtx.nPins+outmat->mtx.nY)*outmat->mtx.pinwgtdim*sizeof(float));

  if (outmat->mtx.nPins*outmat->mtx.pinwgtdim && !wgtarray) MEMORY_ERROR;

  yGNO = outmat->mtx.yGNO;
  pinGNO = outmat->mtx.pinGNO;

KDDKDDKDD(zz->Proc, "    CommPlan Hash");
  cnt = 0;
  for (i = 0; i < outmat->mtx.nY; i++) {
    ZOLTAN_GNO_TYPE edge_gno=-1;
    /* processor row for the edge */
    edge_gno = yGNO[i];

    for (j = outmat->mtx.ystart[i]; j < outmat->mtx.yend[i]; j++) {
      ZOLTAN_GNO_TYPE vtx_gno=-1;
      /* processor column for the vertex */
      vtx_gno = pinGNO[j];

      proclist[cnt] = (*outmat->hashDistFct)(edge_gno, vtx_gno, outmat->hashDistData,
					  &sendbuf[cnt].part_y);
      if (proclist[cnt] < 0) /* Discard this nnz */
        continue;
      sendbuf[cnt].GNO[0] = edge_gno;
      sendbuf[cnt].GNO[1] = vtx_gno;
      memcpy(wgtarray+cnt*outmat->mtx.pinwgtdim, outmat->mtx.pinwgt+j*outmat->mtx.pinwgtdim,
               outmat->mtx.pinwgtdim*sizeof(float));
      cnt++;
    }
    if(outmat->mtx.ystart[i] == outmat->mtx.yend[i]) {
      proclist[cnt] = (*outmat->hashDistFct)(edge_gno, -1, outmat->hashDistData,
					  &sendbuf[cnt].part_y);
      if (proclist[cnt] < 0) /* Discard this nnz */
        continue;
      sendbuf[cnt].GNO[0] = edge_gno;
      sendbuf[cnt].GNO[1] = -1;
      memset(wgtarray+cnt*outmat->mtx.pinwgtdim, 0,outmat->mtx.pinwgtdim*sizeof(float));
      cnt++;
    }
  }

  if (outmat->hashDistFct == (distFnct *)&Zoltan_Distribute_Origin)
    Zoltan_Distribute_Partition_Free(&outmat->hashDistData);

  if (outmat->mtx.yend != outmat->mtx.ystart + 1)
    ZOLTAN_FREE(&outmat->mtx.yend);
  outmat->mtx.yend = NULL;
  ZOLTAN_FREE(&outmat->mtx.ystart);
  ZOLTAN_FREE(&outmat->mtx.yGNO);
  ZOLTAN_FREE(&outmat->mtx.pinGNO);
  ZOLTAN_FREE(&outmat->mtx.pinwgt);
  ZOLTAN_FREE(&outmat->mtx.yGID);


  /*
   * Send pins to their target processors.
   * They become non-zeros in the 2D data distribution.
   */

KDDKDDKDD(zz->Proc, "    CommPlan Create");
  msg_tag--;
  ierr = Zoltan_Comm_Create(&plan, cnt, proclist, communicator, msg_tag, &outmat->mtx.nPins);
  ZOLTAN_FREE(&proclist);

  nonzeros = (Zoltan_Arc *) ZOLTAN_MALLOC((outmat->mtx.nPins) * sizeof(Zoltan_Arc));
  if (outmat->mtx.nPins && nonzeros == NULL) MEMORY_ERROR;

  msg_tag--;
  Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(Zoltan_Arc), (char *) nonzeros);
  ZOLTAN_FREE(&sendbuf);

  if (outmat->mtx.pinwgtdim) { /* We have to take care about weights */
    tmpwgtarray = (float*) ZOLTAN_MALLOC(outmat->mtx.nPins*outmat->mtx.pinwgtdim*sizeof(float));
    if (outmat->mtx.nPins && tmpwgtarray == NULL) MEMORY_ERROR;

    msg_tag--;
    Zoltan_Comm_Do(plan, msg_tag, (char *) wgtarray, outmat->mtx.pinwgtdim*sizeof(float),
		   (char *) tmpwgtarray);
    ZOLTAN_FREE(&wgtarray);
  }
  Zoltan_Comm_Destroy(&plan);

  /* Unpack the non-zeros received. */

KDDKDDKDD(zz->Proc, "    Zoltan_Matrix_Remove_DupArcs");
  /* TODO: do take care about singletons */
  Zoltan_Matrix_Remove_DupArcs(zz, outmat->mtx.nPins, (Zoltan_Arc*)nonzeros, tmpwgtarray,
			       &outmat->mtx);
}

  /* Now we just have to change numbering */
  outmat->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC((nProc_y+1), sizeof(ZOLTAN_GNO_TYPE));
  outmat->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC((nProc_x+1), sizeof(ZOLTAN_GNO_TYPE));
  if (outmat->dist_y == NULL || outmat->dist_x == NULL) MEMORY_ERROR;

  /* FIXME: Work only in 1D */
  tmp_gno = (ZOLTAN_GNO_TYPE)outmat->mtx.nY;
  MPI_Allgather(&tmp_gno, 1, zoltan_gno_mpi_type, outmat->dist_y+1, 1, zoltan_gno_mpi_type, communicator);
  for (i = 1 ; i <= nProc_y ; i ++) {
    outmat->dist_y[i] += outmat->dist_y[i-1];
  }
  outmat->dist_x[1] = outmat->mtx.globalX;

  perm_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(outmat->mtx.nY * sizeof(ZOLTAN_GNO_TYPE));
  if (outmat->mtx.nY > 0 && perm_y == NULL) MEMORY_ERROR;
  for (i = 0 ; i < outmat->mtx.nY ; ++i){
    perm_y[i] = i + outmat->dist_y[myProc_y];
  }

KDDKDDKDD(zz->Proc, "    Zoltan_Matrix_Permute");
  Zoltan_Matrix_Permute(zz, &outmat->mtx, perm_y);

KDDKDDKDD(zz->Proc, "    Zoltan_Matrix_Permute done");
 End:
  ZOLTAN_FREE(&perm_y);
  ZOLTAN_FREE(&proclist);
  ZOLTAN_FREE(&sendbuf);
  ZOLTAN_FREE(&nonzeros);
  ZOLTAN_FREE(&tmpwgtarray);


  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
コード例 #10
0
ファイル: phg_distrib.c プロジェクト: agrippa/Trilinos
static int Zoltan_PHG_Redistribute_Hypergraph(
    ZZ *zz, 
    PHGPartParams *hgp,     /* Input:  parameters; used only for UseFixedVtx */
    HGraph  *ohg,           /* Input:  Local part of distributed hypergraph */
    int     firstproc,      /* Input:  rank (in ocomm) of the first proc of 
                                       the ncomm*/
    int     *v2Col,         /* Input:  Vertex to processor Column Mapping */
    int     *n2Row,         /* Input:  Net to processor Row Mapping */
    PHGComm *ncomm,         /* Input:  communicators of new distribution */
    HGraph  *nhg,           /* Output: Newly redistributed hypergraph */
    int     **vmap,         /* Output: allocated with the size nhg->nVtx and
                               vertex map from nhg to ohg's local vertex number*/
    int     **vdest         /* Output: allocated with the size nhg->nVtx and
                               stores dest proc in ocomm */
    )
{
    char * yo = "Zoltan_PHG_Redistribute_Hypergraph";
    PHGComm *ocomm = ohg->comm;
    int ierr=ZOLTAN_OK;
    int i, v, n, nPins, nsend, elemsz, nVtx, nEdge;
    int msg_tag = 9999;
    int *proclist=NULL, *cnt=NULL; 
    int *intbuf;
    ZOLTAN_GNO_TYPE *sendbuf=NULL;
    ZOLTAN_GNO_TYPE *vno=NULL, *nno=NULL, *dist_x=NULL, *dist_y=NULL,
        *vsn=NULL, *nsn=NULL, *pins=NULL;
    ZOLTAN_COMM_OBJ *plan;    
    MPI_Datatype zoltan_gno_mpi_type;

    zoltan_gno_mpi_type = Zoltan_mpi_gno_type();
    
    Zoltan_HG_HGraph_Init (nhg);
    nhg->comm = ncomm;
    
    nhg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(ZOLTAN_GNO_TYPE));
    nhg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(ZOLTAN_GNO_TYPE));
    dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(ZOLTAN_GNO_TYPE));
    dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(ZOLTAN_GNO_TYPE));
    vsn = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_x+1, sizeof(ZOLTAN_GNO_TYPE));
    nsn = (ZOLTAN_GNO_TYPE *) ZOLTAN_CALLOC(ncomm->nProc_y+1, sizeof(ZOLTAN_GNO_TYPE));
    vno = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(ohg->nVtx * sizeof(ZOLTAN_GNO_TYPE));
    nno = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(ohg->nEdge * sizeof(ZOLTAN_GNO_TYPE));

    if (!nhg->dist_x || !nhg->dist_y || !dist_x || !dist_y ||
        !vsn || !nsn || (ohg->nVtx && !vno) || (ohg->nEdge && !nno) ) {
        uprintf(ocomm, " new comm nProcx=%d nProcy=%d nvtx=%d nedge=%d", ncomm->nProc_x, ncomm->nProc_y, ohg->nVtx, ohg->nEdge);
        MEMORY_ERROR;
    }
      
    for (v = 0; v < ohg->nVtx; ++v)
        ++dist_x[v2Col[v]];
    for (n = 0; n < ohg->nEdge; ++n)
        ++dist_y[n2Row[n]];

    /* UVCUVC: CHECK ASSUMPTION
       This code assumes that the objects in the receive buffer of
       Zoltan_Comm_Do function are
         1- in the increasing processor order,
         2- order of the items send by a processor is preserved.
     */
    

    /* compute prefix sum to find new vertex start numbers; for each processor */
    MPI_Scan(dist_x, vsn, ncomm->nProc_x, zoltan_gno_mpi_type, MPI_SUM, ocomm->row_comm);
    /* All reduce to compute how many each processor will have */ 
    MPI_Allreduce(dist_x, &(nhg->dist_x[1]), ncomm->nProc_x, zoltan_gno_mpi_type, MPI_SUM, 
                  ocomm->row_comm);
    nhg->dist_x[0] = 0;    
    for (i=1; i<=ncomm->nProc_x; ++i) 
        nhg->dist_x[i] += nhg->dist_x[i-1];
    
    MPI_Scan(dist_y, nsn, ncomm->nProc_y, zoltan_gno_mpi_type, MPI_SUM, ocomm->col_comm);

    MPI_Allreduce(dist_y, &(nhg->dist_y[1]), ncomm->nProc_y, zoltan_gno_mpi_type, MPI_SUM, ocomm->col_comm);
    nhg->dist_y[0] = 0;
    for (i=1; i<=ncomm->nProc_y; ++i)
        nhg->dist_y[i] += nhg->dist_y[i-1];

#ifdef _DEBUG1
    PrintArr(ocomm, "vsn", vsn, ncomm->nProc_x);
    PrintArr(ocomm, "nsn", nsn, ncomm->nProc_y);
#endif
    
    /* find mapping of current LOCAL vertex no (in my node)
       to "new" vertex no LOCAL to dest node*/
    for (v = ohg->nVtx-1; v>=0; --v)
        vno[v] = --vsn[v2Col[v]];
    for (n = ohg->nEdge-1; n>=0; --n)
        nno[n] = --nsn[n2Row[n]];

    nsend = MAX(MAX(ohg->nPins, ohg->nVtx), ohg->nEdge);
    elemsz = MAX(MAX(2, ohg->VtxWeightDim), ohg->EdgeWeightDim);
    elemsz = (sizeof(float)>sizeof(ZOLTAN_GNO_TYPE)) ? sizeof(float)*elemsz : sizeof(ZOLTAN_GNO_TYPE)*elemsz;

    proclist = (int *) ZOLTAN_MALLOC(nsend * sizeof(int));
    sendbuf = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(nsend * elemsz);

    if (nsend && (!proclist || !sendbuf)) MEMORY_ERROR;

    /* first communicate pins */
    nPins = 0;
    for (v = 0; v < ohg->nVtx; ++v) { 
        for (i = ohg->vindex[v]; i < ohg->vindex[v+1]; ++i) {
#ifdef _DEBUG1
            if ((n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])<0 ||
                (n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v])>=ocomm->nProc)
                errexit("vertex %d vedge[%d]=%d n2Row=%d #Proc_x=%d v2Col=%d", i, ohg->vedge[i], n2Row[ohg->vedge[i]], ncomm->nProc_x , v2Col[v]);
#endif
            proclist[nPins]   = firstproc + n2Row[ohg->vedge[i]] * ncomm->nProc_x + v2Col[v];
            sendbuf[2*nPins]  = vno[v];
            sendbuf[2*nPins+1]= nno[ohg->vedge[i]];
            ++nPins; 
        }
    }
#ifdef _DEBUG1
    if (nPins!=ohg->nPins) {
        uprintf(ocomm, "sanity check failed nPins(%d)!=hg->nPins(%d)\n", nPins, ohg->nPins);
        errexit("terminating");
    }
#endif

    --msg_tag;
    ierr |= Zoltan_Comm_Create(&plan, ohg->nPins, proclist, ocomm->Communicator, msg_tag, &nPins);

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nPins>1) { /* this processor is not in new comm but receiving data?*/
        uprintf(ocomm, "Something wrong; why I'm receiving data nPins=%d\n", nPins);
        errexit("terminating");
    }
#endif
    
    if (nPins && (pins = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(nPins * 2 * sizeof(ZOLTAN_GNO_TYPE)))==NULL) 
        MEMORY_ERROR;

    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, 2*sizeof(ZOLTAN_GNO_TYPE), (char *) pins);
    Zoltan_Comm_Destroy(&plan);

    /* now communicate vertex map */
    nsend = 0;
    intbuf = (int *)sendbuf;
    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) { 
            proclist[nsend] = firstproc+v2Col[v];
            intbuf[nsend++] = ohg->vmap[v];
        }
    }
        
    --msg_tag; 
    ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator, msg_tag, &nVtx); 

#ifdef _DEBUG1
    if (ncomm->myProc==-1 && nVtx>1) { /* this processor is not in new comm but receiving data?*/ 
        uprintf(ocomm, "Something wrong; why I'm receiving data nVtx=%d\n", nVtx);
        errexit("terminating");
    }
#endif

    /* those are only needed in the first row of ncomm */
    *vmap = *vdest = NULL;  
    if (!ncomm->myProc_y && nVtx &&
        (!(*vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int))) ||
         !(*vdest = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))))
        MEMORY_ERROR;
    

    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int), (char *) *vmap);

    intbuf = (int *)sendbuf;
    if (!ocomm->myProc_y) { /* only first row sends to the first row of ncomm */
        for (v = 0; v < ohg->nVtx; ++v) 
            intbuf[v] = ocomm->myProc;
    }
    --msg_tag;
    Zoltan_Comm_Do(plan, msg_tag, (char *) sendbuf, sizeof(int), (char *) *vdest);
        
    if (ncomm->myProc!=-1) { /* I'm in the new comm */
        /* ncomm's first row now bcast to other rows */
        MPI_Bcast(&nVtx, 1, MPI_INT, 0, ncomm->col_comm);
#ifdef _DEBUG1
        if (nVtx!=(int)(nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]))
            errexit("nVtx(%d)!= nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x](%d)", nVtx, nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]);
#endif
        if (nVtx && (nhg->vmap = (int *) ZOLTAN_MALLOC(nVtx * sizeof(int)))==NULL) 
            MEMORY_ERROR;
        for (i=0; i<nVtx; ++i)
            nhg->vmap[i] = i;
    }


    /* now communicate vertex weights */
    if (ohg->VtxWeightDim) {
        if (nVtx){
            nhg->vwgt = (float*) ZOLTAN_MALLOC(nVtx*ohg->VtxWeightDim*sizeof(float));
            if (!nhg->vwgt) MEMORY_ERROR;
        }
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->vwgt,
                       ohg->VtxWeightDim*sizeof(float), (char *) nhg->vwgt);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->vwgt, nVtx*ohg->VtxWeightDim, MPI_FLOAT, 0, ncomm->col_comm);
    }

    /* communicate coordinates */
    if (ohg->nDim) {
      if (nVtx) {
	nhg->coor = (double *) ZOLTAN_MALLOC(nVtx * ohg->nDim * sizeof(double));
	if (!nhg->coor) MEMORY_ERROR;
      }

      --msg_tag;
      Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->coor, ohg->nDim * sizeof(double),
		     (char *) nhg->coor);
      if (ncomm->myProc != -1) /* ncomm's first row bcast to other rows */
	MPI_Bcast(nhg->coor, nVtx * ohg->nDim, MPI_DOUBLE, 0, ncomm->col_comm);
    }
    
    /* communicate fixed vertices, if any */
    if (hgp->UseFixedVtx) {
        if (nVtx){
            nhg->fixed_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
            if (!nhg->fixed_part) MEMORY_ERROR;
        }
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->fixed_part,
                       sizeof(int), (char *) nhg->fixed_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->fixed_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }    
    /* communicate pref parts, if any */
    if (hgp->UsePrefPart) {
        if (nVtx){
            nhg->pref_part = (int *) ZOLTAN_MALLOC(nVtx*sizeof(int));
            if (!nhg->pref_part) MEMORY_ERROR;
        }
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->pref_part,
                       sizeof(int), (char *) nhg->pref_part);
        if (ncomm->myProc!=-1)  /* ncomm's first row now bcast to other rows */
            MPI_Bcast(nhg->pref_part, nVtx, MPI_INT, 0, ncomm->col_comm);
    }

    /* this comm plan is no longer needed. */
    Zoltan_Comm_Destroy(&plan);

    
    if (ohg->EdgeWeightDim) { /* now communicate edge weights */
        nsend = 0;
        if (!ocomm->myProc_x)  /* only first column sends to first column of ncomm */
            for (n = 0; n < ohg->nEdge; ++n) 
                proclist[nsend++] = firstproc + n2Row[n]*ncomm->nProc_x;
    
        --msg_tag;
        ierr |= Zoltan_Comm_Create(&plan, nsend, proclist, ocomm->Communicator,
                                   msg_tag, &nEdge);

#ifdef _DEBUG1
        if (ncomm->myProc==-1 && nEdge>1) { /* this processor is not in new comm but receiving data?*/
            uprintf(ocomm, "Something wrong; why I'm receiving data nEdge=%d\n", nEdge);
            errexit("terminating");
        }
#endif
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            MPI_Bcast(&nEdge, 1, MPI_INT, 0, ncomm->row_comm);
#ifdef _DEBUG1
            if (nEdge != (nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]))
            errexit("nEdge(%d)!=nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y](%d)", nEdge, nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]);
#endif
        }
        
        if (nEdge){
            nhg->ewgt = (float*) ZOLTAN_MALLOC(nEdge*ohg->EdgeWeightDim*sizeof(float));
            if (!nhg->ewgt) MEMORY_ERROR;
        }
    
        --msg_tag;
        Zoltan_Comm_Do(plan, msg_tag, (char *) ohg->ewgt,
                       ohg->EdgeWeightDim*sizeof(float), (char *) nhg->ewgt);
        if (ncomm->myProc!=-1) { /* if we're in the new comm */
            /* ncomm's first column now bcast to other columns */
            if (nEdge) 
                MPI_Bcast(nhg->ewgt, nEdge*ohg->EdgeWeightDim, MPI_FLOAT, 0, 
                          ncomm->row_comm);
        }

        Zoltan_Comm_Destroy(&plan);
    } else 
        nEdge = (ncomm->myProc==-1) 
                ? 0 
                : (int)(nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]);
    

    if (ncomm->myProc==-1) {
#ifdef _DEBUG1
        if (nPins || nVtx || nEdge)
            errexit("I should not have any data: hey nPins=%d  nVtx=%d  nEdge=%d\n", nPins, nVtx, nEdge);
#endif
        nhg->nEdge = nhg->nVtx = nhg->nPins = 0;
    } else {
        nhg->nEdge = (int)(nhg->dist_y[ncomm->myProc_y+1] - nhg->dist_y[ncomm->myProc_y]);
        nhg->nVtx = (int)(nhg->dist_x[ncomm->myProc_x+1] - nhg->dist_x[ncomm->myProc_x]);
        nhg->nPins = nPins;
    
        /* Unpack the pins received. */
        cnt = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vindex = (int *) ZOLTAN_CALLOC(nhg->nVtx + 1, sizeof(int));
        nhg->vedge = (int *) ZOLTAN_MALLOC(nhg->nPins * sizeof(int));
        
        if (!cnt || !nhg->vindex || (nPins && !nhg->vedge))
            MEMORY_ERROR;

        /* Count the number of pins per vertex */
        for (i = 0; i < nPins; ++i)
            ++cnt[pins[2*i]];
        
        /* Compute prefix sum to represent hindex correctly. */
        for (i = 0; i < nhg->nVtx; ++i)  {
            nhg->vindex[i+1] = nhg->vindex[i] + cnt[i];
            cnt[i] = nhg->vindex[i];
        }

        for (i = 0; i < nPins; ++i) 
            nhg->vedge[cnt[pins[2*i]]++] = pins[2*i+1];
        
        nhg->info               = ohg->info;
	nhg->nDim               = ohg->nDim;
        nhg->VtxWeightDim       = ohg->VtxWeightDim;
        nhg->EdgeWeightDim      = ohg->EdgeWeightDim;

        ierr = Zoltan_HG_Create_Mirror(zz, nhg);
        if (ierr != ZOLTAN_OK && ierr != ZOLTAN_WARN)
            MEMORY_ERROR;
    }

 End:
    Zoltan_Comm_Destroy(&plan);  /* Needed if goto End on error condition */

    Zoltan_Multifree(__FILE__, __LINE__, 10,
                     &proclist, &sendbuf, &pins, &cnt,
                     &vno, &nno, &dist_x, &dist_y, &vsn, &nsn
        );
    
    return ierr;
}
コード例 #11
0
int       Zoltan_Comm_Do_AlltoAll(
ZOLTAN_COMM_OBJ * plan,		/* communication data structure */
char *send_data,		/* array of data I currently own */
int nbytes,			/* multiplier for sizes */
char *recv_data)		/* array of data I'll own after comm */
{
  static char *yo = "Zoltan_Comm_Do_AlltoAll";
  char *outbuf=NULL, *inbuf=NULL, *buf=NULL;
  int *outbufCounts=NULL, *outbufOffsets=NULL; 
  int *inbufCounts=NULL, *inbufOffsets=NULL;
  int nprocs, me, rc, i, j, k, p, sorted;
  int nSendMsgs, nSendItems, nRecvMsgs, nRecvItems;
  int length, offset, itemSize, outbufLen;

  int sm = (plan->self_msg > 0) ? 1 : 0;

  nSendMsgs = plan->nsends + sm;
  nRecvMsgs = plan->nrecvs + sm;

  for (i=0, nSendItems=0; i <nSendMsgs; i++){
    nSendItems += plan->lengths_to[i];
  }
  for (i=0, nRecvItems=0; i <nRecvMsgs; i++){
    nRecvItems += plan->lengths_from[i];
  }

  MPI_Comm_size(plan->comm, &nprocs);
  MPI_Comm_rank(plan->comm, &me);

  outbufCounts = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  outbufOffsets = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  inbufCounts = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));
  inbufOffsets = (int *) ZOLTAN_CALLOC(nprocs , sizeof(int));

  if (!outbufCounts || !outbufOffsets || !inbufCounts || !inbufOffsets){
    ZOLTAN_COMM_ERROR("memory error", yo, me);
  }

  /* The *_to fields of the plan refer to the items in the send_data buffer,
   * and how to pull out the correct items for each receiver.  The
   * *_from fields of the plan refer to the recv_data buffer.  Items 
   * arrive in process rank order, and these fields tell us where to
   * put them in the recv_data buffer.
   */

  /* CREATE SEND BUFFER */

  sorted = 0;
  if (plan->indices_to == NULL){
    sorted = 1;
    for (i=1; i< nSendMsgs; i++){
      if (plan->starts_to[i] < plan->starts_to[i-1]){
        sorted = 0;
        break;
      }
    }
  }

  if (plan->sizes_to){
    /*
     * Each message contains items for a process, and each item may be
     * a different size.
     */

    for (i=0, outbufLen=0; i < nSendMsgs; i++){
      outbufLen += plan->sizes_to[i];
    }

    if (plan->indices_to){
      /*
       * items are not grouped by message
       */
      buf = outbuf = (char *)ZOLTAN_MALLOC(outbufLen * nbytes);
      if (outbufLen && nbytes && !outbuf){
        ZOLTAN_COMM_ERROR("memory error", yo, me);
      }

      for (p=0, i=0, k=0; p < nprocs; p++){

        length = 0;

        if (i < nSendMsgs){
          if (plan->procs_to[i] == p){   /* procs_to is sorted */
  
            for (j=0; j < plan->lengths_to[i]; j++,k++){
              itemSize = plan->sizes[plan->indices_to[k]] * nbytes;
              offset = plan->indices_to_ptr[k] * nbytes;
  
              memcpy(buf, send_data + offset, itemSize);
  
              buf += itemSize;
              length += itemSize;
            }
            i++;
          }
        }
  
        outbufCounts[p] = length;
        if (p){
          outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
        }
      }
    }
    else{
      /*
       * items are stored contiguously for each message
       */

      if (!sorted || (plan->nvals > nSendItems) ){
        buf = outbuf = (char *)ZOLTAN_MALLOC(outbufLen * nbytes);
        if (outbufLen && nbytes && !outbuf){
          ZOLTAN_COMM_ERROR("memory error", yo, me);
        }
      }
      else{
        /* All items in send_data are being sent, and they are sorted
         * in process rank order.
         */
        outbuf = send_data;
      }

      for (p=0, i=0; p < nprocs; p++){

        length = 0;

        if (i < nSendMsgs){
          if (plan->procs_to[i] == p){   /* procs_to is sorted */
            length = plan->sizes_to[i] * nbytes;
            offset = plan->starts_to_ptr[i] * nbytes;
  
            if ((!sorted || (plan->nvals > nSendItems)) && length){
              memcpy(buf, send_data + offset, length);
              buf += length;
            }
            i++;
          }
        }
  
        outbufCounts[p] = length;
        if (p){
          outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
        }
      }
    }
  }
  else if (plan->indices_to){
    /*
     * item sizes are constant, however the items belonging in a given
     * message may not be contiguous in send_data
     */

    buf = outbuf = (char *)ZOLTAN_MALLOC(nSendItems * nbytes);
    if (nSendMsgs && nbytes && !outbuf){
      ZOLTAN_COMM_ERROR("memory error", yo, me);
    }

    for (p=0, i=0, k=0; p < nprocs; p++){

      length = 0;
     
      if (i < nSendMsgs){
        if (plan->procs_to[i] == p){   /* procs_to is sorted */
          for (j=0; j < plan->lengths_to[i]; j++,k++){
            offset = plan->indices_to[k] * nbytes;
            memcpy(buf, send_data + offset, nbytes);
            buf += nbytes;
          }
          length = plan->lengths_to[i] * nbytes;
          i++;
        }
      }

      outbufCounts[p] = length;
      if (p){
        outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
      }
    }
  }
  else{                          

    /* item sizes are constant, and items belonging to a
     * given message are always stored contiguously in send_data
     */

    if (!sorted || (plan->nvals > nSendItems)){
      buf = outbuf = (char *)ZOLTAN_MALLOC(nSendItems * nbytes);
      if (nSendItems && nbytes && !outbuf){
        ZOLTAN_COMM_ERROR("memory error", yo, me);
      }
    }
    else{
      /* send_data is sorted by process, and we don't skip
       * any of the data in the buffer, so we can use send_data 
       * in the alltoall call
       */
      outbuf = send_data;
    }

    for (p=0,i=0; p < nprocs; p++){

      length = 0;
     
      if (i < nSendMsgs){
        if (plan->procs_to[i] == p){    /* procs_to is sorted */
          offset = plan->starts_to[i] * nbytes;
          length = plan->lengths_to[i] * nbytes;
  
          if ((!sorted || (plan->nvals > nSendItems)) && length){
            memcpy(buf, send_data + offset, length);
            buf += length;
          }
          i++;
        }
      }

      outbufCounts[p] = length;
      if (p){
        outbufOffsets[p] = outbufOffsets[p-1] + outbufCounts[p-1];
      }
    }
  }

  /* CREATE RECEIVE BUFFER */

  sorted = 0;
  if (plan->indices_from == NULL){
    sorted = 1;
    for (i=1; i< nRecvMsgs; i++){
      if (plan->starts_from[i] < plan->starts_from[i-1]){
        sorted = 0;
        break;
      }
    }
  }

  if (sorted){
    /* Caller already expects received data to be ordered by
     * the sending process rank.
     */
    inbuf = recv_data;
  }
  else{
    inbuf = (char *)ZOLTAN_MALLOC(plan->total_recv_size * nbytes);
    if (plan->total_recv_size && nbytes && !inbuf){
      ZOLTAN_COMM_ERROR("memory error", yo, me);
    }
  }

  for (p=0, i=0; p < nprocs; p++){
    length = 0;

    if (i < nRecvMsgs){
      if (plan->procs_from[i] == p){
  
        if (plan->sizes == NULL){
          length = plan->lengths_from[i] * nbytes;
        }
        else{
          length = plan->sizes_from[i] * nbytes;
        }  
        i++;
      }
    }

    inbufCounts[p] = length;
    if (p){
      inbufOffsets[p] = inbufOffsets[p-1] + inbufCounts[p-1];
    }
  }

  /* EXCHANGE DATA */

  rc = MPI_Alltoallv(outbuf, outbufCounts, outbufOffsets, MPI_BYTE,
                     inbuf, inbufCounts, inbufOffsets, MPI_BYTE,
                     plan->comm);

  if (outbuf != send_data){
    ZOLTAN_FREE(&outbuf);
  }
  ZOLTAN_FREE(&outbufCounts);
  ZOLTAN_FREE(&outbufOffsets);

  /* WRITE RECEIVED DATA INTO USER'S BUFFER WHERE IT'S EXPECTED */

  if (!sorted){

    buf = inbuf;

    if (plan->sizes == NULL){

      /* each item in each message is nbytes long */

      if (plan->indices_from == NULL){
        for (i=0; i < nRecvMsgs; i++){
          offset = plan->starts_from[i] * nbytes;
          length = plan->lengths_from[i] * nbytes;
          memcpy(recv_data + offset, buf, length);
          buf += length;
        }
      }
      else{
        for (i=0,k=0; i < nRecvMsgs; i++){

          for (j=0; j < plan->lengths_from[i]; j++,k++){
            offset = plan->indices_from[k] * nbytes;
            memcpy(recv_data + offset, buf, nbytes);
            buf += nbytes;
          }
        }
      }
    }
    else{  /* (sizes!=NULL) && (indices_from!=NULL) not allowed by Zoltan_Comm_Resize */

      /* items can be different sizes */

      for (i=0; i < nRecvMsgs; i++){
        offset = plan->starts_from_ptr[i] * nbytes;
        length = plan->sizes_from[i] * nbytes;
        memcpy(recv_data + offset, buf, length);
        buf += length;
      }
    }

    ZOLTAN_FREE(&inbuf);
  }

  ZOLTAN_FREE(&inbufCounts);
  ZOLTAN_FREE(&inbufOffsets);

  return ZOLTAN_OK;
}
コード例 #12
0
ファイル: phg_gather.c プロジェクト: 00liujj/trilinos
int Zoltan_PHG_Gather_To_All_Procs(
  ZZ *zz, 
  HGraph *phg,           /* Input:   Local part of distributed hypergraph */
  PHGPartParams *hgp,        /* Input:   Hypergraph parameters */
  PHGComm *scomm,        /* Input:   Serial PHGComm for use by shg. */
  HGraph **gathered_hg   /* Output:  combined hypergraph combined to proc */
)
{
/* 
 * Function to gather distributed hypergraph onto each processor for
 * coarsest partitioning.
 * First hypergraph arrays for the hypergraph on a column of processors
 * are built using MPI_Allgathers down the processor columns.
 * These hypergraph arrays contain complete info about a subset of vertices.
 * Second the column hypergraphs are gathered along processor rows.
 * Each processor then has a complete description of the hypergraph.
 */
char *yo = "Zoltan_PHG_Gather_To_All_Procs";
int ierr = ZOLTAN_OK;
int i, tmp, sum;
int *each = NULL,
    *disp = NULL;      /* Size and displacement arrays for MPI_Allgatherv */
int *send_buf = NULL;    /* Buffer of values to be sent */
int send_size;           /* Size of buffer send_buf */
int *col_vedge = NULL;   /* vedge array for the proc-column hypergraph */
int *col_vindex = NULL;  /* vindex array for the proc-column hypergraph */
int *col_hvertex = NULL; /* hvertex array for the proc-column hypergraph */
int *col_hindex = NULL;  /* hindex array for the proc-column hypergraph */
int col_nVtx;            /* Number of vertices in processor column */
int col_nEdge;           /* Number of edges in processor column */
int col_nPin;            /* Number of pins in processor column */

int *recv_size = NULL;   /* nPins for each proc in col or row */

HGraph *shg;             /* Pointer to the serial hypergraph to be
                            returned by this function. */

int myProc_x = phg->comm->myProc_x;
int nProc_x = phg->comm->nProc_x;
int nProc_y = phg->comm->nProc_y;
int max_nProc_xy = MAX(nProc_x, nProc_y);

  if (phg->comm->nProc == 1) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Do not call this routine on one proc.");
    return ZOLTAN_FATAL;
  }

#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, phg, NULL, stdout, "GatherBefore");/* NULL parts for now;
                                                           add non-NULL later */
#endif

  /******************************************************************
   *  0. Allocate the hypergraph to be returned. 
   *  Set values that we already know. 
   ******************************************************************/

  shg = *gathered_hg = (HGraph *) ZOLTAN_MALLOC(sizeof(HGraph));
  if (!shg) MEMORY_ERROR;

  Zoltan_HG_HGraph_Init(shg);
  shg->nVtx = phg->dist_x[nProc_x];    /* TODO64 - can this exceed 2B? */
  shg->nEdge = phg->dist_y[nProc_y];

  shg->dist_x = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  shg->dist_y = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(2 * sizeof(ZOLTAN_GNO_TYPE));
  if (!shg->dist_x || !shg->dist_y) MEMORY_ERROR;

  shg->dist_x[0] = shg->dist_y[0] = 0;
  shg->dist_x[1] = shg->nVtx;
  shg->dist_y[1] = shg->nEdge;

  shg->comm = scomm;

  shg->EdgeWeightDim = phg->EdgeWeightDim;
  shg->VtxWeightDim = phg->VtxWeightDim;
  if (shg->VtxWeightDim && shg->nVtx)
    shg->vwgt = (float *) ZOLTAN_MALLOC(shg->nVtx * shg->VtxWeightDim 
                                                  * sizeof(float));
  if (shg->EdgeWeightDim && shg->nEdge)
    shg->ewgt = (float *) ZOLTAN_MALLOC(shg->nEdge * shg->EdgeWeightDim 
                                                  * sizeof(float));
  /* Fixed vertices */
  shg->bisec_split = phg->bisec_split;
  if (hgp->UseFixedVtx)
    shg->fixed_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  if (hgp->UsePrefPart)
    shg->pref_part = (int *) ZOLTAN_MALLOC(shg->nVtx * sizeof(int));
  
  /* Allocate arrays for use in gather operations */
  recv_size = (int *) ZOLTAN_MALLOC(3 * max_nProc_xy * sizeof(int));
  each = recv_size + max_nProc_xy;
  disp = each + max_nProc_xy;
 
  /* TODO64 - phg->dist_y[nProc_y] could exceed 2 Billion, NO? */
  send_size = MAX(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x], 
                  phg->dist_y[nProc_y]);
  send_buf = (int *) ZOLTAN_MALLOC(send_size * sizeof(int));
  

  if ((shg->VtxWeightDim && shg->nVtx && !shg->vwgt) ||
      (shg->EdgeWeightDim && shg->nEdge && !shg->ewgt) || !recv_size ||
      (send_size && !send_buf)) 
    MEMORY_ERROR;
  

  /*************************************************************
   *  1. Gather all non-zeros for vertices in processor column *
   *************************************************************/
  
  if (nProc_y == 1) {
    /* 
     * Don't need a gather; just set pointers appropriately for row-gather
     * in Step 2 below.
     */

    col_nVtx = phg->nVtx;
    col_nEdge = phg->nEdge;
    col_nPin = phg->nPins;
    col_vindex = phg->vindex;
    col_vedge = phg->vedge;
    col_hindex = phg->hindex;
    col_hvertex = phg->hvertex;

    for (i = 0; i < shg->EdgeWeightDim * shg->nEdge; i++)
      shg->ewgt[i] = phg->ewgt[i];
  }

  else {

    /* Gather local size info for each proc in column */

    MPI_Allgather(&(phg->nPins), 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->col_comm);
  
    /* Compute number of vtx, edge, and nnz in column */
    col_nVtx = (int)(phg->dist_x[myProc_x+1] - phg->dist_x[myProc_x]);
    col_nEdge = phg->dist_y[nProc_y];   /* SCHEMEA */
    col_nPin = 0;
    for (i = 0; i < nProc_y; i++) {
      col_nPin += recv_size[i];
    }
    
    /* Allocate arrays for column hypergraph */
    col_hindex = (int *) ZOLTAN_CALLOC((col_nEdge+1), sizeof(int));
    col_hvertex = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    col_vindex = (int *) ZOLTAN_CALLOC((col_nVtx+1), sizeof(int));
    col_vedge = (int *) ZOLTAN_MALLOC(col_nPin * sizeof(int));
  
    if (!col_vindex || !col_hindex || 
        (col_nPin && (!col_vedge || !col_hvertex)))
      MEMORY_ERROR;
    
    /* Gather hvertex data for all procs in column */
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex LNO to GNO here. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_y; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(phg->hvertex, phg->nPins, MPI_INT,
                   col_hvertex, recv_size, disp, MPI_INT, phg->comm->col_comm);
  
    /* SCHEMEA uses same vertex LNO on each proc in column. */
    /* SCHEMEB would require conversion from vertex GNO to LNO here */
  
    /* Gather hindex data for all procs in column */
  
    for (i = 0; i < phg->nEdge; i++)
      send_buf[i] = phg->hindex[i+1] - phg->hindex[i];
  
    /* SCHEMEA can assume a recv for each edge;
     * SCHEMEB needs to gather the number of edges recv'd from each proc. */
  
    for (i = 0; i < nProc_y; i++) 
      each[i] = phg->dist_y[i+1] - phg->dist_y[i];

    disp[0] = 0;  /* Can't use dist_y because it may not be sizeof(int) */
    for (i=1; i < nProc_y; i++){
      disp[i] = disp[i-1] + each[i-1];
    }
  
    /* SCHEMEA can use phg->dist_y for displacement array.
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, phg->nEdge, MPI_INT, 
                   col_hindex, each, disp, MPI_INT, phg->comm->col_comm);
  
    /* Perform prefix sum on col_hindex */
    sum = 0;
    for (i = 0; i < col_nEdge; i++) {
      tmp = col_hindex[i];
      col_hindex[i] = sum;
      sum += tmp;
    }
    col_hindex[col_nEdge] = sum;

    /* Sanity check */
    if (col_hindex[col_nEdge] != col_nPin) {
      printf("%d Sanity check failed:  "
             "col_hindex[col_nEdge] %d != col_nPin %d\n", 
              zz->Proc, col_hindex[col_nEdge], col_nPin);
      exit(-1);
    }
  
    /* Gather edge weights, if any. */
    if (shg->EdgeWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->EdgeWeightDim;
      for (i = 1; i < nProc_y; i++) {
        each[i] *= phg->EdgeWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->ewgt, phg->nEdge*phg->EdgeWeightDim, MPI_FLOAT, 
                     shg->ewgt, each, disp, MPI_FLOAT, phg->comm->col_comm);
    }
   
  
    Zoltan_HG_Mirror(col_nEdge, col_hindex, col_hvertex, 
                     col_nVtx, col_vindex, col_vedge);
  
  }  /* End column-gather */
  
  /*************************************************************
   *  2. Gather all non-zeros for edges in processor rows      *
   *  All processors in a processor column now have the same   *
   *  hypergraph; we now gather it across rows.                *
   *************************************************************/

  if (nProc_x == 1) {
    /* 
     * Don't need a gather across the row; just set pointers appropriately
     * in shg.
     */
    shg->vindex = col_vindex;
    shg->vedge = col_vedge;
    shg->hindex = col_hindex;
    shg->hvertex = col_hvertex;

    /* Copy vwgt and fixed arrays so shg owns this memory */
    for (i = 0; i < shg->VtxWeightDim*shg->nVtx; i++)
      shg->vwgt[i] = phg->vwgt[i];
    if (hgp->UseFixedVtx)
      for (i = 0; i < shg->nVtx; i++)
        shg->fixed_part[i] = phg->fixed_part[i];
    if (hgp->UsePrefPart)
      for (i = 0; i < shg->nVtx; i++)
        shg->pref_part[i] = phg->pref_part[i];
  }

  else {

    /* Gather info about size within the row */

    MPI_Allgather(&col_nPin, 1, MPI_INT, recv_size, 1, MPI_INT, 
                  phg->comm->row_comm);
  
    tmp = 0;
    for (i = 0; i < nProc_x; i++) 
      tmp += recv_size[i];

    shg->nPins = tmp;
  
    shg->vindex = (int *) ZOLTAN_CALLOC((shg->nVtx+1), sizeof(int));
    shg->vedge = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
    shg->hindex = (int *) ZOLTAN_CALLOC((shg->nEdge+1), sizeof(int));
    shg->hvertex = (int *) ZOLTAN_MALLOC(shg->nPins * sizeof(int));
   
    if (!shg->vindex || !shg->hindex ||
        (shg->nPins && (!shg->vedge || !shg->hvertex)))
      MEMORY_ERROR;
    
    /* Gather vedge data for all procs in row */
  
    /* SCHEMEA can send local edge numbers; 
       SCHEMEB requires edge LNO to GNO conversion. */
  
    disp[0] = 0;
    for (i = 1; i < nProc_x; i++)
      disp[i] = disp[i-1] + recv_size[i-1];
  
    MPI_Allgatherv(col_vedge, col_nPin, MPI_INT,
                   shg->vedge, recv_size, disp, MPI_INT, phg->comm->row_comm);
  
    /* Gather vindex data for all procs in row */
  
    for (i = 0; i < col_nVtx; i++)
      send_buf[i] = col_vindex[i+1] - col_vindex[i];
  
    /* SCHEMEA can assume a recv for each vertex;
     * SCHEMEB would need to gather the number of vtxs recv'd from each proc. */
  
    for (i = 0; i < nProc_x; i++) 
      each[i] = (int)(phg->dist_x[i+1] - phg->dist_x[i]);

    disp[0] = 0;  /* Can't use dist_x, may not be sizeof(int) */
    for (i = 1; i < nProc_x; i++) 
      disp[i] = disp[i-1] + each[i-1];

    /* SCHEMEA can use phg->dist_x as displacement array;
     * SCHEMEB requires separate displacement array. */

    MPI_Allgatherv(send_buf, col_nVtx, MPI_INT, 
                   shg->vindex, each, disp,
                   MPI_INT, phg->comm->row_comm);

    /* Perform prefix sum on shg->vindex */
    sum = 0;
    for (i = 0; i < shg->nVtx; i++) {
      tmp = shg->vindex[i];
      shg->vindex[i] = sum;
      sum += tmp;
    }
    shg->vindex[shg->nVtx] = sum;
  
    /* Sanity check */
    if (shg->vindex[shg->nVtx] != shg->nPins) {
      printf("%d Sanity check failed:  "
             "shg->vindex %d != nPins %d\n", 
              zz->Proc, shg->vindex[shg->nVtx], shg->nPins);
      exit(-1);
    }
  
    /* Gather fixed array, if any  */
    if (hgp->UseFixedVtx){
  
#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather before gather. phg->fixed =");
      for (i=0; i<phg->nVtx; i++){
        printf(" %d ", phg->fixed_part[i]);
      }
      printf("\n");
#endif

      /* Can use the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->fixed_part, phg->nVtx, MPI_FLOAT, 
                     shg->fixed_part, each, disp, MPI_FLOAT, phg->comm->row_comm);

#ifdef DEBUG_
      uprintf(phg->comm, "Debug in PHG_gather after gather. shg->fixed =");
      for (i=0; i<shg->nVtx; i++){
        printf(" %d ", shg->fixed_part[i]);
      }
      printf("\n");
#endif
    }
    /* Gather pref part array, if any  */
    if (hgp->UsePrefPart){
      /* Can use the same each array. */
      /* Need to compute new disp array. */
      disp[0] = 0;
      for (i = 1; i < nProc_x; i++) {
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->pref_part, phg->nVtx, MPI_FLOAT, 
                     shg->pref_part, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
    
    /* Gather vertex weights, if any. */
    if (shg->VtxWeightDim) {
  
      /* Can use nearly the same each array. */
      /* Need to compute new disp array. */
  
      disp[0] = 0;
      each[0] *= phg->VtxWeightDim;
      for (i = 1; i < nProc_x; i++) {
        each[i] *= phg->VtxWeightDim;
        disp[i] = disp[i-1] + each[i-1];
      }
      
      MPI_Allgatherv(phg->vwgt, phg->nVtx*phg->VtxWeightDim, MPI_FLOAT, 
                     shg->vwgt, each, disp, MPI_FLOAT, phg->comm->row_comm);
    }
  
    Zoltan_HG_Mirror(shg->nVtx, shg->vindex, shg->vedge, 
                     shg->nEdge, shg->hindex, shg->hvertex);

  }  /* End row gather */
  
#ifdef KDDKDD_CHECK
  Zoltan_HG_Print(zz, shg, NULL, stdout, "GatherAfter");/* NULL parts for now;
                                                           add non-NULL later */
  Zoltan_PHG_Plot_2D_Distrib(zz, phg);
  Zoltan_PHG_Plot_2D_Distrib(zz, shg);
#endif

End:

  if (ierr < 0) {
    Zoltan_HG_HGraph_Free(*gathered_hg);
    ZOLTAN_FREE(gathered_hg);
  }

  Zoltan_Multifree(__FILE__, __LINE__, 2, &send_buf, 
                                          &recv_size);

  if (nProc_x > 1 && nProc_y > 1) 
    Zoltan_Multifree(__FILE__, __LINE__, 4, &col_vedge,
                                            &col_vindex,
                                            &col_hvertex,
                                            &col_hindex);
  return ierr;
}
コード例 #13
0
int Zoltan_PHG_CoarsePartition(
  ZZ *zz, 
  HGraph *phg,         /* Input:  coarse hypergraph -- distributed! */
  int numPart,         /* Input:  number of partitions to generate. */
  float *part_sizes,   /* Input:  array of size numPart listing target sizes
                                  (% of work) for the partitions */
  Partition part,      /* Input:  array of initial partition assignments.
                          Output: array of computed partition assignments.   */
  PHGPartParams *hgp   /* Input:  parameters to use.  */
)
{
/* 
 * Zoltan_PHG_CoarsePartition computes a partitioning of a hypergraph.
 * Typically, this routine is called at the bottom level in a
 * multilevel scheme (V-cycle).
 * It gathers the distributed hypergraph to each processor and computes
 * a decomposition of the serial hypergraph.  
 * It computes a different partition on each processor
 * using different random numbers (and possibly also
 * different algorithms) and selects the best.
 */
char *yo = "Zoltan_PHG_CoarsePartition";
int ierr = ZOLTAN_OK;
int i, si, j;
static PHGComm scomm;          /* Serial communicator info */
static int first_time = 1;
HGraph *shg = NULL;            /* Serial hypergraph gathered from phg */
int *spart = NULL;             /* Partition vectors for shg. */
int *new_part = NULL;          /* Ptr to new partition vector. */
float *bestvals = NULL;        /* Best cut values found so far */
int worst, new_cand;
float bal, cut, worst_cut;
int fine_timing = (hgp->use_timers > 2);
struct phg_timer_indices *timer = Zoltan_PHG_LB_Data_timers(zz);
int local_coarse_part = hgp->LocalCoarsePartition;

/* Number of iterations to try coarse partitioning on each proc. */
/* 10 when p=1, and 1 when p is large. */
const int num_coarse_iter = 1 + 9/zz->Num_Proc; 


  ZOLTAN_TRACE_ENTER(zz, yo);

  if (fine_timing) {
    if (timer->cpgather < 0)
      timer->cpgather = Zoltan_Timer_Init(zz->ZTime, 1, "CP Gather");
    if (timer->cprefine < 0)
      timer->cprefine = Zoltan_Timer_Init(zz->ZTime, 0, "CP Refine");
    if (timer->cpart < 0)
      timer->cpart = Zoltan_Timer_Init(zz->ZTime, 0, "CP Part");

    ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
  }


  /* Force LocalCoarsePartition if large global graph */
#define LARGE_GRAPH_VTX   64000
#define LARGE_GRAPH_PINS 256000
  if (phg->dist_x[phg->comm->nProc_x] > LARGE_GRAPH_VTX){
    /* TODO: || (global_nPins > LARGE_GRAPH_PINS) */
    local_coarse_part = 1;
  }

  /* take care of all special cases first */

  if (!strcasecmp(hgp->coarsepartition_str, "no")
      || !strcasecmp(hgp->coarsepartition_str, "none")) {
    /* Do no coarse partitioning. */
    /* Do a sanity test and  mapping to parts [0,...,numPart-1] */
    int first = 1;
    PHGComm *hgc=phg->comm;    

    Zoltan_Srand_Sync (Zoltan_Rand(NULL), &(hgc->RNGState_col), hgc->col_comm);
    if (hgp->UsePrefPart) {
        for (i = 0; i < phg->nVtx; i++) {
            /* Impose fixed vertex/preferred part constraints. */
            if (phg->pref_part[i] < 0) { /* Free vertex in fixedvertex partitioning or repart */
                /* randomly assigned to a part */
                part[i] = Zoltan_Rand_InRange(&(hgc->RNGState_col), numPart);
            } else {
                if (phg->bisec_split < 0)
                    /* direct k-way, use part numbers directly */
                    part[i] = phg->pref_part[i];
                else
                    /* recursive bisection, map to 0-1 part numbers */
                    part[i] = (phg->pref_part[i] < phg->bisec_split ? 0 : 1);
            }            
        }
    } else {
        for (i = 0; i < phg->nVtx; i++) {
            if (part[i] >= numPart || part[i]<0) {
                if (first) {
                    ZOLTAN_PRINT_WARN(zz->Proc, yo, "Initial part number > numParts.");
                    first = 0;
                    ierr = ZOLTAN_WARN;
                }
                part[i] = ((part[i]<0) ? -part[i] : part[i]) % numPart;
            }        
        }
    }
  }
  else if (numPart == 1) {            
    /* everything goes in the one partition */
    for (i =  0; i < phg->nVtx; i++)
      part[i] = 0;
  }
  else if (!hgp->UsePrefPart && numPart >= phg->dist_x[phg->comm->nProc_x]) { 
    /* more partitions than vertices, trivial answer */
    for (i = 0; i < phg->nVtx; i++)
      part[i] = phg->dist_x[phg->comm->myProc_x]+i;
  }
  else if (local_coarse_part) {
    /* Apply local partitioner to each column */
    ierr = local_coarse_partitioner(zz, phg, numPart, part_sizes, part, hgp,
                                    hgp->CoarsePartition);
  }
  else {
    /* Normal case:
     * Gather distributed HG to each processor;
     * compute different partitioning on each processor;
     * select the "best" result.
     */
    ZOLTAN_PHG_COARSEPARTITION_FN *CoarsePartition;

    /* Select different coarse partitioners for processors here. */

    CoarsePartition = hgp->CoarsePartition;
    if (CoarsePartition == NULL) { /* auto */
      /* Select a coarse partitioner from the array of coarse partitioners */
      CoarsePartition = CoarsePartitionFns[phg->comm->myProc % 
                                           NUM_COARSEPARTITION_FNS];
    }


    if (phg->comm->nProc == 1) {
      /* Serial and parallel hgraph are the same. */
      shg = phg;
    }
    else {
      /* Set up a serial communication struct for gathered HG */

      if (first_time) {
        scomm.nProc_x = scomm.nProc_y = 1;
        scomm.myProc_x = scomm.myProc_y = 0;
        scomm.Communicator = MPI_COMM_SELF;
        scomm.row_comm = MPI_COMM_SELF;
        scomm.col_comm = MPI_COMM_SELF;
        scomm.myProc = 0;
        scomm.nProc = 1;
        first_time = 0;
      }
      scomm.RNGState = Zoltan_Rand(NULL);
      scomm.RNGState_row = Zoltan_Rand(NULL);
      scomm.RNGState_col = Zoltan_Rand(NULL);
      scomm.zz = zz;

      /* 
       * Gather parallel hypergraph phg to each processor, creating
       * serial hypergraph shg.
       */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpgather, phg->comm->Communicator);
      }

      ierr = Zoltan_PHG_Gather_To_All_Procs(zz, phg, hgp, &scomm, &shg);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Error returned from gather.");
        goto End;
      }

      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpgather, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

    }

    /* 
     * Allocate partition array spart for the serial hypergraph shg
     * and partition shg.
     */
    spart = (int *) ZOLTAN_CALLOC(shg->nVtx * (NUM_PART_KEEP+1),
                                    sizeof(int));
    bestvals = (float *) ZOLTAN_MALLOC((NUM_PART_KEEP+1)*sizeof(int)); 
    if ((!spart) || (!bestvals)) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Out of memory.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }
    
    /* Compute several coarse partitionings. */
    /* Keep the NUM_PART_KEEP best ones around. */
    /* Currently, only the best one is used. */

    /* Set RNG so different procs compute different parts. */
    Zoltan_Srand(Zoltan_Rand(NULL) + zz->Proc, NULL);

    new_cand = 0;
    new_part = spart;

    for (i=0; i< num_coarse_iter; i++){
      int savefmlooplimit=hgp->fm_loop_limit;
        
      /* Overwrite worst partition with new candidate. */
      ierr = CoarsePartition(zz, shg, numPart, part_sizes, 
               new_part, hgp);
      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from CoarsePartition.");
        goto End;
      }

      /* time refinement step in coarse partitioner */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cprefine, phg->comm->Communicator);
      }

      /* UVCUVC: Refine new candidate: only one pass is enough. */
      hgp->fm_loop_limit = 1;
      Zoltan_PHG_Refinement(zz, shg, numPart, part_sizes, new_part, hgp);
      hgp->fm_loop_limit = savefmlooplimit;
      
      /* stop refinement timer */
      if (fine_timing) {
        ZOLTAN_TIMER_STOP(zz->ZTime, timer->cprefine, phg->comm->Communicator);
        ZOLTAN_TIMER_START(zz->ZTime, timer->cpart, phg->comm->Communicator);
      }

      /* Decide if candidate is in the top tier or not. */
      /* Our objective is a combination of cuts and balance */

      bal = Zoltan_PHG_Compute_Balance(zz, shg, part_sizes, 0, 
                                       numPart, new_part); 
      cut = Zoltan_PHG_Compute_ConCut(shg->comm, shg, new_part, numPart, &ierr);
      
      /* Use ratio-cut as our objective. There are many other options! */
      bestvals[new_cand] = cut/(MAX(2.-bal, 0.0001)); /* avoid divide-by-0 */

      if (ierr < 0) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                         "Error returned from Zoltan_PHG_Compute_ConCut.");
        goto End;
      }
      if (i<NUM_PART_KEEP)
        new_cand = i+1;
      else {
        /* find worst partition vector, to overwrite it */
        /* future optimization: keep bestvals sorted */
        worst = 0;
        worst_cut = bestvals[0];
        for (j=1; j<NUM_PART_KEEP+1; j++){
          if (worst_cut < bestvals[j]){
            worst_cut = bestvals[j];
            worst = j;
          }
        }
        new_cand = worst;
      }
      new_part = spart+new_cand*(shg->nVtx);
    }
    /* Copy last partition vector such that all the best ones
       are contiguous starting at spart.                     */
    for (i=0; i<shg->nVtx; i++){
      new_part[i] = spart[NUM_PART_KEEP*(shg->nVtx)+i];
    }
    /* Also update bestvals */
    bestvals[new_cand] = bestvals[NUM_PART_KEEP];

    /* Evaluate and select the best. */
    /* For now, only pick the best one, in the future we pick the k best. */

    ierr = pick_best(zz, hgp, phg->comm, shg, numPart, 
              MIN(NUM_PART_KEEP, num_coarse_iter), spart,
              bestvals);
    if (ierr < 0) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, 
                        "Error returned from pick_best.");
      goto End;
    }
  
    if (phg->comm->nProc > 1) {
      /* Map gathered partition back to 2D distribution */
      for (i = 0; i < phg->nVtx; i++) {
        /* KDDKDD  Assume vertices in serial HG are ordered by GNO of phg */
        si = VTX_LNO_TO_GNO(phg, i);
        part[i] = spart[si];
      }

      Zoltan_HG_HGraph_Free(shg);
      ZOLTAN_FREE(&shg);
    } 
    else { /* single processor */
      for (i = 0; i < phg->nVtx; i++)
        part[i] = spart[i];
    }
    ZOLTAN_FREE(&spart);
    ZOLTAN_FREE(&bestvals);
  }
  
End:
  if (fine_timing) 
    ZOLTAN_TIMER_STOP(zz->ZTime, timer->cpart, phg->comm->Communicator);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ierr;
}
コード例 #14
0
static int local_HEs_from_import_lists(
  ZZ *zz,
  int remap_type,      /* type of remapping to do:  parts, procs, or none. */
  int nobj,            /* # objs the processor knows about (keep + imports) */
  int *proc,           /* On input, old processor assignment for each obj; 
                          Upon return, remapped new proc assignment for
                          each obj. */
  int *old_part,       /* old partition assignments for each objs */
  int *new_part,       /* On input, new partition assignments for each objs.
                          Upon return, remapped new partition assignments */
  int *HEcnt,          /* # of HEs allocated. */
  int **HEinfo         /* Array of HE info; for each HE, two pins and 
                          one edge weight. Stored as a single vector
                          to minimize communication calls.  */
)
{
/*  Routine to remap partitions (to new processors or new partition numbers)
 *  to reduce data movement.
 *  This routine assumes the load-balancing algorithm built import lists.
 *  Objects described are those that ENDED UP on my_proc due to load balancing.
 *  For all these objects, new_proc == my_proc.
 */
char *yo = "local_HEs_from_import_lists";
int ierr = ZOLTAN_OK;
int i, cnt, tmp;
int *tmp_HEinfo;
int old_size;                 /* # of old entries to remap to. If remapping
                                 parts to processors, old_size = Num_Procs;
                                 if renumbering partitions, old_size = old 
                                 num parts. */
int fp;                       /* First partition on this processor in new
                                 decomposition. */
int np;                       /* # of partitions on this processor in new
                                 decomposition. */
int my_proc = zz->Proc;       /* This processor's rank. */
int minp, maxp;               /* Lowest and highest partition numbers on this
                                 processor in old decomposition;
                                 partition numbers are assumed to be dense,
                                 but no particular distribution is assumed. */
int HEwgt_size;               /* # of HE weights allocated. */
int *HEwgt = NULL;            /* Array of HE weights.  Initially includes
                                 zero weights; later zero-weights are removed.*/


  if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) {

    /* Renumber new processors to minimize changes in proc assignment. */

    HEwgt_size = zz->Num_Proc;
    HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
    if (!HEwgt) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < nobj; i++) 
      HEwgt[proc[i]]++;    /* At this point, proc has old proc assignments */

    *HEcnt = 0;
    for (i = 0; i < HEwgt_size; i++)
      if (HEwgt[i] != 0) (*HEcnt)++;
 
    ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
    if (ierr < 0) 
      goto End;
    tmp_HEinfo = *HEinfo;

    cnt = 0;
    for (i = 0; i < HEwgt_size; i++) {
      if (HEwgt[i] != 0) {
        tmp = cnt * HEINFO_ENTRIES;
        tmp_HEinfo[tmp] = i;               /* Old processor number */
        tmp_HEinfo[tmp+1] = my_proc;       /* New processor number */
        tmp_HEinfo[tmp+2] = HEwgt[i];      /* shift non-zero weights down. */
        cnt++;
      }
    }
  }

  else {  /* ZOLTAN_LB_REMAP_PARTS */

    /* Renumber new partitions to minimize changes in partition assignment */

    for (minp = INT_MAX, maxp = 0, i = 0; i < nobj; i++) {
      if (old_part[i] < minp) minp = old_part[i];
      if (old_part[i] > maxp) maxp = old_part[i];
    }

    /* Don't include old partition numbers that are greater than 
     * zz->LB.Num_Global_Parts - 1; they are not valid values for 
     * remapping of new partition numbers. 
     */
    if (minp >= zz->LB.Num_Global_Parts)
      minp = zz->LB.Num_Global_Parts-1;
    if (maxp >= zz->LB.Num_Global_Parts)
      maxp = zz->LB.Num_Global_Parts-1;

    old_size = maxp - minp + 1; 

    Zoltan_LB_Proc_To_Part(zz, my_proc, &np, &fp);
    HEwgt_size = np * old_size;
    if (HEwgt_size > 0) {
      HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
      if (!HEwgt) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
        ierr = ZOLTAN_MEMERR;
        goto End;
      }
    }

    for (i = 0; i < nobj; i++) {
      if (old_part[i] < zz->LB.Num_Global_Parts) {  
        /* Include only HEs to old partitions numbered 
         * 0 to zz->LB.Num_Global_Parts-1; these are the only valid
         * remapping values for the new partition numbers.
         */
        tmp = (new_part[i]-fp) * old_size;
        HEwgt[tmp + (old_part[i]-minp)]++;
      }
    }

    *HEcnt = 0;
    for (i = 0; i < HEwgt_size; i++)
      if (HEwgt[i] != 0) (*HEcnt)++;
   
    ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
    if (ierr < 0) 
      goto End;
    tmp_HEinfo = *HEinfo;

    cnt = 0;
    for (i = 0; i < HEwgt_size; i++) {
      if (HEwgt[i] != 0) {
        tmp = cnt * HEINFO_ENTRIES;
        tmp_HEinfo[tmp] = i%old_size + minp;  /* Old partition number */
        tmp_HEinfo[tmp+1] = i/old_size + fp;  /* New partition number */
        tmp_HEinfo[tmp+2] = HEwgt[i];         /* shift non-zero weights down. */
        cnt++;
      }
    }
  }

End:
  
  if (HEwgt) ZOLTAN_FREE(&HEwgt);

  return ierr;
}    
コード例 #15
0
ファイル: rib_util.c プロジェクト: 00liujj/trilinos
int Zoltan_RIB_Build_Structure(ZZ *zz, int *num_obj, int *max_obj, int wgtflag,
                               double overalloc, int use_ids, int gen_tree)
{
/* Function to build the geometry-based data structures for RIB method. */
char           *yo = "Zoltan_RIB_Build_Structure";
RIB_STRUCT     *rib;                  /* Data structure for RIB.             */
struct rib_tree *treeptr;
int            i, ierr = 0;

  /* Allocate an RIB data structure for this Zoltan structure.
     If the previous data structure is still there, free the Dots and IDs first;
     the other fields can be reused. */

  if (zz->LB.Data_Structure == NULL) {
    rib = (RIB_STRUCT *) ZOLTAN_MALLOC(sizeof(RIB_STRUCT));
    if (rib == NULL) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
      return(ZOLTAN_MEMERR);
    }
    zz->LB.Data_Structure = (void *) rib;
    rib->Tree_Ptr = NULL;
    rib->Global_IDs = NULL;
    rib->Local_IDs = NULL;

    Zoltan_Initialize_Transformation(&(rib->Tran));

    if (gen_tree) {
      rib->Tree_Ptr = (struct rib_tree *)
                ZOLTAN_CALLOC(zz->LB.Num_Global_Parts, sizeof(struct rib_tree));
      if (rib->Tree_Ptr == NULL) {
        ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
        Zoltan_RIB_Free_Structure(zz);
        return(ZOLTAN_MEMERR);
      }
      /* initialize Tree_Ptr */
      for (i = 0; i < zz->LB.Num_Global_Parts; i++) {
        treeptr = &(rib->Tree_Ptr[i]);
        treeptr->cm[0] = treeptr->cm[1] = treeptr->cm[2] = 0.0;
        treeptr->ev[0] = treeptr->ev[1] = treeptr->ev[2] = 0.0;
        treeptr->cut = 0.0;
        treeptr->parent = treeptr->left_leaf = treeptr->right_leaf = 0;
      }
    }
  }
  else {
    rib = (RIB_STRUCT *) zz->LB.Data_Structure;
    ZOLTAN_FREE(&(rib->Global_IDs));
    ZOLTAN_FREE(&(rib->Local_IDs));
    ZOLTAN_FREE(&(rib->Dots));
  }

  ierr = Zoltan_RB_Build_Structure(zz, &(rib->Global_IDs), &(rib->Local_IDs),
                               &(rib->Dots), num_obj, max_obj, &(rib->Num_Geom),
                               wgtflag, overalloc, use_ids, 0);
  if (ierr) {
    ZOLTAN_PRINT_ERROR(zz->Proc, yo,
                       "Error returned from Zoltan_RB_Build_Structure.");
    Zoltan_RIB_Free_Structure(zz);
    return(ierr);
  }

  return(ZOLTAN_OK);
}
コード例 #16
0
static int local_HEs_from_export_lists(
  ZZ *zz,
  int remap_type,      /* type of remapping to do:  parts, procs, or none. */
  int nobj,            /* # objs the processor knows about (keep + exports) */
  int *new_proc,       /* On input, new processor assignment for each obj; 
                          Upon return, remapped new proc assignment for
                          each obj. */
  int *old_part,       /* old partition assignments for each objs */
  int *new_part,       /* On input, new partition assignments for each objs.
                          Upon return, remapped new partition assignments */
  int *HEcnt,          /* # of HEs allocated. */
  int **HEinfo         /* Array of HE info; for each HE, two pins and 
                          one edge weight. Stored as a single vector
                          to minimize communication calls.  */
) 
{
/*  Routine to remap partitions (to new processors or new partition numbers)
 *  to reduce data movement.
 *  This routine assumes the load-balancing algorithm built export lists.
 *  Objects described are those that STARTED on zz->Proc due to load balancing.
 *  For all these objects, old_proc == zz->Proc.
 */
char *yo = "local_HEs_from_export_lists";
int ierr = ZOLTAN_OK;
int i, cnt, tmp;
int *tmp_HEinfo;
int my_proc = zz->Proc;       /* This processor's rank. */

int nimp = 0;
int *imp_proc = NULL,         /* Temporary arrays if inversion of export to */
    *imp_old_part = NULL,     /* import lists is needed. */
    *imp_new_part = NULL;

int HEwgt_size;               /* # of HE weights allocated. */
int *HEwgt = NULL;            /* Array of HE weights.  Initially includes
                                 zero weights; later zero-weights are removed.*/

  if (remap_type == ZOLTAN_LB_REMAP_PROCESSORS) {
    /* Build HEs based on processor assignment.
     * We know the old processor for all objects we are keeping and all
     * export objects -- it is my_proc!
     * We also know the new processor number for all objects initially on
     * my_proc (since we built export lists.)
     * This case is a special case of partition remapping; it is easy to 
     * build the hyperedges in this special case.
     */

    HEwgt_size = zz->Num_Proc;
    HEwgt = (int *) ZOLTAN_CALLOC(HEwgt_size, sizeof(int));
    if (!HEwgt) {
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      ierr = ZOLTAN_MEMERR;
      goto End;
    }

    for (i = 0; i < nobj; i++) 
      HEwgt[new_proc[i]]++;

    *HEcnt = 0;
    for (i = 0; i < HEwgt_size; i++)
      if (HEwgt[i] != 0) (*HEcnt)++;
   
    ierr = malloc_HEinfo(zz, *HEcnt, HEinfo);
    if (ierr < 0) 
      goto End;
    tmp_HEinfo = *HEinfo;

    cnt = 0;
    for (i = 0; i < HEwgt_size; i++) {
      if (HEwgt[i] != 0) {
        tmp = cnt * HEINFO_ENTRIES;
        tmp_HEinfo[tmp] = my_proc;    /* Old processor number */
        tmp_HEinfo[tmp+1] = i;        /* New processor number */
        tmp_HEinfo[tmp+2] = HEwgt[i]; /* shift non-zero weights down. */
        cnt++;
      }
    }
  }

  else {  /* ZOLTAN_LB_REMAP_PARTS */
    /* Cannot renumber partitions given export lists without summing HE weights
     * across processors.  This summation is not straightforward.  Also, a 
     * potentially large number of HEs may exist 
     * (max_old_partition_number * zz->Num_Global_Parts).  Rather than build
     * this large matrix, just compute import lists from the export lists
     * and run the import-list algorithm.
     */
    ZOLTAN_COMM_OBJ *plan;
    int msg_tag = 22345;

    ierr = Zoltan_Comm_Create(&plan, nobj, new_proc, zz->Communicator,
                              msg_tag, &nimp);

    if (nimp > 0) {
      imp_proc = (int *) ZOLTAN_MALLOC(3 * nimp * sizeof(int));
      imp_old_part = imp_proc + nimp;
      imp_new_part = imp_old_part + nimp;
      if (!imp_proc) {
        ierr = ZOLTAN_MEMERR;
        ZOLTAN_PRINT_ERROR(my_proc, yo, "Memory error.");
        goto End;
      }
    }

    ierr = Zoltan_Comm_Info(plan, NULL, NULL, NULL, NULL, NULL, NULL, 
                            NULL, NULL, NULL, NULL, NULL, imp_proc, NULL);

    msg_tag++;
    ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) old_part, sizeof(int),
                          (char *) imp_old_part);

    msg_tag++;
    ierr = Zoltan_Comm_Do(plan, msg_tag, (char *) new_part, sizeof(int),
                          (char *) imp_new_part);

    Zoltan_Comm_Destroy(&plan);

    ierr = local_HEs_from_import_lists(zz, remap_type, nimp, imp_proc,
                                       imp_old_part, imp_new_part,
                                       HEcnt, HEinfo);
  }

End:

  if (HEwgt) ZOLTAN_FREE(&HEwgt);
  if (imp_proc) ZOLTAN_FREE(&imp_proc);

  return ierr;
}
コード例 #17
0
ファイル: color_test.c プロジェクト: 00liujj/trilinos
int Zoltan_Color_Test(
    ZZ *zz,                   /* Zoltan structure */
    int *num_gid_entries,     /* # of entries for a global id */
    int *num_lid_entries,     /* # of entries for a local id */
    int num_obj,              /* Input: number of objects */
    ZOLTAN_ID_PTR global_ids, /* Input: global ids of the vertices */
			      /* The application must allocate enough space */
    ZOLTAN_ID_PTR local_ids,  /* Input: local ids of the vertices */
			      /* The application must allocate enough space */
    int *color_exp            /* Input: Colors assigned to local vertices */
)
{
  static char *yo = "color_test_fn";
  int nvtx = num_obj;               /* number of vertices */
  int i, j;
  int ierr = ZOLTAN_OK;
  int ferr = ZOLTAN_OK;             /* final error signal */
  char coloring_problem;   /* Input: which coloring to perform;
			   currently only supports D1, D2 coloring and variants */
  char coloring_problemStr[MAX_PARAM_STRING_LEN]; /* string version coloring problem name */
  int ss=100;
  char comm_pattern='S', coloring_order='I', coloring_method='F';
  int comm[2],gcomm[2];
  int *color=NULL;

  int *adjproc=NULL, *xadj=NULL;
  ZOLTAN_GNO_TYPE gvtx;                         /* number of global vertices */
  ZOLTAN_GNO_TYPE *vtxdist=NULL, *adjncy=NULL;
  ZG graph;
  ZOLTAN_GNO_TYPE *requested_GNOs = NULL;  /* Return GNOs of 
                                              the requested GIDs.  */
  int *loc_partialD2 = NULL;    /* local binary array showing which vertices to be colored */
  int *partialD2 = NULL;        /* global binary array showing which vertices to be colored */
  struct Zoltan_DD_Struct *dd_color;  /* DDirectory for colors */
  ZOLTAN_GNO_TYPE *local_GNOs = NULL;
  ZOLTAN_GNO_TYPE *global_GNOs = NULL;
  
  memset (&graph, 0, sizeof(ZG));

  /* PARAMETER SETTINGS */
  Zoltan_Bind_Param(Color_params, "COLORING_PROBLEM",   (void *) &coloring_problemStr);
  Zoltan_Bind_Param(Color_params, "SUPERSTEP_SIZE",     (void *) &ss);
  Zoltan_Bind_Param(Color_params, "COMM_PATTERN",       (void *) &comm_pattern);
  Zoltan_Bind_Param(Color_params, "VERTEX_VISIT_ORDER", (void *) &coloring_order);
  Zoltan_Bind_Param(Color_params, "COLORING_METHOD",    (void *) &coloring_method);

  strncpy(coloring_problemStr, "distance-1", MAX_PARAM_STRING_LEN);

  Zoltan_Assign_Param_Vals(zz->Params, Color_params, zz->Debug_Level, zz->Proc,
			   zz->Debug_Proc);

  /* Check validity of parameters - they should be consistent with Zoltan_Color */
  if (!strcasecmp(coloring_problemStr, "distance-1"))
      coloring_problem = '1';
  else if (!strcasecmp(coloring_problemStr, "distance-2"))
      coloring_problem = '2';
  else if (!strcasecmp(coloring_problemStr, "partial-distance-2")
      || !strcasecmp(coloring_problemStr, "bipartite"))
      coloring_problem = 'P';
  else {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Unknown coloring requested. Using Distance-1 coloring.");
      coloring_problem = '1';
  }
  if (ss == 0) {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Invalid superstep size. Using default value 100.");
      ss = 100;
  }
  if (comm_pattern != 'S' && comm_pattern != 'A') {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Invalid communication pattern. Using synchronous communication (S).");
      comm_pattern = 'S';
  }
  if (comm_pattern == 'A' && (coloring_problem == '2' || coloring_problem == 'P')) {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Asynchronous communication pattern is not implemented for distance-2 coloring and its variants. Using synchronous communication (S).");
      comm_pattern = 'S';
  }
  if (coloring_order != 'I' && coloring_order != 'B' && coloring_order != 'U' && coloring_order != 'L' && coloring_order != 'N' && coloring_order != 'S') {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Invalid coloring order. Using internal first coloring order (I).");
      coloring_order = 'I';
  }
  if (coloring_order == 'U' && (coloring_problem == '2' || coloring_problem == 'P')) {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Interleaved coloring order is not implemented for distance-2 coloring and its variants. Using internal first coloring order (I).");
      coloring_order = 'I';
  }
  if (coloring_method !='F') {
      ZOLTAN_PRINT_WARN(zz->Proc, yo, "Invalid coloring method. Using first fit method (F).");
      coloring_method = 'F';
  }

  /* Compute Max number of array entries per ID over all processors.
     This is a sanity-maintaining step; we don't want different
     processors to have different values for these numbers. */
  comm[0] = zz->Num_GID;
  comm[1] = zz->Num_LID;
  MPI_Allreduce(comm, gcomm, 2, MPI_INT, MPI_MAX, zz->Communicator);
  zz->Num_GID = *num_gid_entries = gcomm[0];
  zz->Num_LID = *num_lid_entries = gcomm[1];

  /* Return if this processor is not in the Zoltan structure's
     communicator. */
  if (ZOLTAN_PROC_NOT_IN_COMMUNICATOR(zz))
      return ZOLTAN_OK;

  /* BUILD THE GRAPH */
  /* Check that the user has allocated space for the return args. */
  if (!color_exp)
      ZOLTAN_COLOR_ERROR(ZOLTAN_FATAL, "Color argument is NULL. Please give colors of local vertices.");


  requested_GNOs = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(num_obj *
                                                     sizeof(ZOLTAN_GNO_TYPE));
  Zoltan_ZG_Build (zz, &graph, 0, 1, num_obj, 
                   global_ids, requested_GNOs);

  Zoltan_ZG_Export (zz, &graph,
		    &gvtx, &nvtx, NULL, NULL, 
                    &vtxdist, &xadj, &adjncy, &adjproc,
		    NULL, NULL);

  if (gvtx > (ZOLTAN_GNO_TYPE)INT_MAX){
      if (zz->Proc == 0){
          fprintf(stderr,
                  "Zoltan_Color_Test assumes number of vertices (%ld) is less than INT_MAX\n",gvtx);
      }
      ierr = ZOLTAN_FATAL;
      goto End;
  }

KDDKDDKDD(zz->Proc, "COLORTEST DD");
  /* Exchange global color information */
  if (vtxdist[zz->Num_Proc] && !(color = (int *) ZOLTAN_CALLOC(vtxdist[zz->Num_Proc], sizeof(int))))
      MEMORY_ERROR;

  if (nvtx && !(local_GNOs = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(nvtx * sizeof(ZOLTAN_GNO_TYPE))))
      MEMORY_ERROR;
  for (i=0; i<nvtx; ++i)
      local_GNOs[i] = i+vtxdist[zz->Proc];
  if (vtxdist[zz->Num_Proc] && !(global_GNOs = (ZOLTAN_GNO_TYPE *) ZOLTAN_MALLOC(vtxdist[zz->Num_Proc] * sizeof(ZOLTAN_GNO_TYPE))))
      MEMORY_ERROR;
  for (i=0; i<vtxdist[zz->Num_Proc]; ++i)
      global_GNOs[i] = i;

  ierr = Zoltan_DD_Create (&dd_color, zz->Communicator, 
                           sizeof(ZOLTAN_GNO_TYPE)/sizeof(ZOLTAN_ID_TYPE), 0, 0, num_obj, 0);
  if (ierr != ZOLTAN_OK)
      ZOLTAN_COLOR_ERROR(ierr, "Cannot construct DDirectory.");
  /* Put req obs with 1 but first inialize the rest with 0 */
  ierr = Zoltan_DD_Update (dd_color, (ZOLTAN_ID_PTR)local_GNOs, NULL,
                           NULL, color, nvtx);
  if (ierr != ZOLTAN_OK)
      ZOLTAN_COLOR_ERROR(ierr, "Cannot update DDirectory.");
  ierr = Zoltan_DD_Update (dd_color, (ZOLTAN_ID_PTR)requested_GNOs, NULL,
                           NULL, color_exp, num_obj);
  if (ierr != ZOLTAN_OK)
      ZOLTAN_COLOR_ERROR(ierr, "Cannot update DDirectory.");
  /* Get requested colors from the DDirectory. */
  ierr = Zoltan_DD_Find (dd_color, (ZOLTAN_ID_PTR)global_GNOs, NULL, NULL,
                         color, vtxdist[zz->Num_Proc], NULL);

  if (ierr != ZOLTAN_OK)
      ZOLTAN_COLOR_ERROR(ierr, "Cannot find object in DDirectory.");
  /* Free DDirectory */
  Zoltan_DD_Destroy(&dd_color);
  ZOLTAN_FREE(&local_GNOs);
  ZOLTAN_FREE(&global_GNOs);

KDDKDDKDD(zz->Proc, "COLORTEST CHECK");

  if (coloring_problem == 'P' || coloring_problem == '2') {
      if (vtxdist[zz->Num_Proc] && !(partialD2 = (int *) ZOLTAN_CALLOC(vtxdist[zz->Num_Proc], sizeof(int))))
	  MEMORY_ERROR;
      if (vtxdist[zz->Num_Proc] && !(loc_partialD2 = (int *) ZOLTAN_CALLOC(vtxdist[zz->Num_Proc], sizeof(int))))
	  MEMORY_ERROR;

      if (coloring_problem == 'P') {
          for (i=0; i<num_obj; ++i) {
              int gno=requested_GNOs[i];
              loc_partialD2[gno] = 1;           
          }

          MPI_Allreduce(loc_partialD2, partialD2, vtxdist[zz->Num_Proc], MPI_INT, MPI_LOR, zz->Communicator);
      } else {
          for (i=0; i<vtxdist[zz->Num_Proc]; ++i)
              partialD2[i] = 1;
      }      
  }

  
  /* Check if there is an error in coloring */
  if (coloring_problem == '1') {
      for (i=0; i<nvtx; i++) {
          int gno = i + (int)vtxdist[zz->Proc];
          if (color[gno] <= 0) { /* object i is not colored properly */
              ierr = ZOLTAN_FATAL;
              printf("Error in coloring! u:%d, cu:%d\n", gno, color[gno]);               
              break;
          }
          for (j = xadj[i]; j < xadj[i+1]; ++j) {
              int v = (int)adjncy[j];
              if (color[gno] == color[v]) { /* neighbors have the same color */
                  ierr = ZOLTAN_FATAL;
                  printf("Error in coloring! u:%d, v:%d, cu:%d, cv:%d\n", gno, v, color[gno], color[v]); 
                  break; 
              }
          }
          if (ierr == ZOLTAN_FATAL)
              break;
      }
  } else if (coloring_problem == '2' || coloring_problem == 'P') {
      for (i=0; i<nvtx; i++) {
          int gno = i + (int)vtxdist[zz->Proc];
          if (partialD2[gno] && color[gno] <= 0) { /* object i is not colored properly */
              ierr = ZOLTAN_FATAL;
              printf("Error in coloring! u:%d, cu:%d\n", gno, color[gno]); 
              break;
          }
          for (j = xadj[i]; j < xadj[i+1]; ++j) {
              int v = (int)adjncy[j], k;
              if (partialD2[v] && color[v] <= 0) {
                  ierr = ZOLTAN_FATAL;
                  printf("Error in coloring! d1-neigh: u:%d, v:%d, cu:%d, cv:%d  pu:%d pv:%d\n", gno, v, color[gno], color[v], partialD2[gno], partialD2[v]);
                  break;
              }
              if (partialD2[gno] && partialD2[v] && color[gno] == color[v]) { /* d-1 neighbors have the same color */
                  ierr = ZOLTAN_FATAL;
                  printf("Error in coloring! d1-neigh: u:%d, v:%d, cu:%d, cv:%d  pu:%d pv:%d\n", gno, v, color[gno], color[v], partialD2[gno], partialD2[v]); 
                  break;
              }
              for (k = j+1; k < xadj[i+1]; ++k) {
                  int w = (int)adjncy[k];
                  if (partialD2[v] && partialD2[w] && color[v] == color[w]) { /* d-2 neighbors have the same color */
                      ierr = ZOLTAN_FATAL;
                      printf("Error in coloring! d2-neigh: v:%d, w:%d, cv:%d, cw:%d   pv:%d pw:%d\n", v, w, color[v], color[w], partialD2[v], partialD2[w]); 
                      break;

                  }

              }
          }
          if (ierr == ZOLTAN_FATAL)
              break;
      }      
  } else 
      ZOLTAN_COLOR_ERROR(ZOLTAN_WARN, "Zoltan_Color_Test is only implemented for distance-1 and distance-2 coloring. Unknown coloring, skipping verification.");      
  
 End:
KDDKDDKDD(zz->Proc, "COLORTEST DONE");
  if (ierr==ZOLTAN_FATAL)
      ierr = 2;
  MPI_Allreduce(&ierr, &ferr, 1, MPI_INT, MPI_MAX, zz->Communicator);
  if (ferr == 2)
      ierr = ZOLTAN_FATAL;
  else
      ierr = ZOLTAN_OK;

  Zoltan_ZG_Free (&graph);
  ZOLTAN_FREE(&adjproc);
  ZOLTAN_FREE(&color);
  ZOLTAN_FREE(&requested_GNOs);
  ZOLTAN_FREE(&partialD2);
  ZOLTAN_FREE(&loc_partialD2);

  return ierr;
}
コード例 #18
0
/* path growing matching, hypergraph version */
static int matching_pgm (ZZ *zz, HGraph *hg, int *match, int *limit)
{
int i, j, k, side = 0, edge, vertex, *Match[2] = {NULL, NULL};
int limits[2], neighbor, next_vertex, pins;
double w[2]={0.0,0.0}, weight, max_weight, *sims = NULL;
char  *yo = "matching_pgm";

  limits[0] = limits[1] = *limit;
  Match[0] = match;
  if (hg->nVtx) {
    if (!(Match[1] = (int*)   ZOLTAN_MALLOC (hg->nVtx * sizeof(int)))
     || !(sims     = (double*) ZOLTAN_CALLOC (hg->nVtx,  sizeof(double))) ) {
      Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
      ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Memory error.");
      return ZOLTAN_MEMERR;
    }
  }

  for (i = 0; i < hg->nVtx; i++)
    Match[1][i] = i;

  for (i = 0; i < hg->nVtx  &&  limits[side] > 0; i++) {
    if (Match[0][i] == i && Match[1][i] == i) {
      vertex = i;
      while (vertex > 0 && limits[side] > 0) {
        max_weight = 0.0;
        next_vertex = -1;

        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          pins = hg->hindex[edge+1] - hg->hindex[edge];
          weight = 2.0 / ((pins-1)*pins);
          if (hg->ewgt)
            weight *= hg->ewgt[edge];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (neighbor != vertex && Match[0][neighbor] == neighbor && 
                Match[1][neighbor]==neighbor)
               sims[neighbor] += weight;
          }
        }
        for (j = hg->vindex[vertex]; j < hg->vindex[vertex+1]; j++) {
          edge = hg->vedge[j];
          for (k = hg->hindex[edge]; k < hg->hindex[edge+1]; k++) {
            neighbor = hg->hvertex[k];
            if (sims[neighbor] > 0.0) {
              if (sims[neighbor] > max_weight) {
                max_weight = sims[neighbor];
                next_vertex = neighbor;
              }
              sims[neighbor] = 0.0;
            }
          }
        }

        if (next_vertex >= 0) {
          Match[side][vertex] = next_vertex;
          Match[side][next_vertex] = vertex;
          limits[side]--;
          w[side] += max_weight;
          side = 1-side;
        }
        vertex = next_vertex;
      }
    }
  }

  if (w[0] < w[1]) {
    for (i = 0; i < hg->nVtx; i++)
      match[i] = Match[1][i];
    *limit = limits[1];
  }
  else
    *limit = limits[0];

  Zoltan_Multifree (__FILE__, __LINE__, 2, &Match[1], &sims);
  return ZOLTAN_OK;
}
コード例 #19
0
ファイル: phg.c プロジェクト: askhl/octopus-dfrt2
static int findAndSaveLeftAloneVertices(ZZ *zz, HGraph *hg, int p, 
                                 Partition parts,
                                 PHGPartParams *hgp) 
{
    char *yo="findAndSaveLeftAloneVertices";
    int *lneigh[2]={NULL, NULL}, *neigh[2]={NULL, NULL}, i, j, ierr=ZOLTAN_OK;
    int *lpins=NULL, *pins=NULL;
    PHGComm *hgc=hg->comm;

    if (hg->nEdge && (!(lpins = (int*) ZOLTAN_CALLOC(p * hg->nEdge, sizeof(int))) ||
                      !(pins  = (int*) ZOLTAN_MALLOC(p * hg->nEdge * sizeof(int)))))
        MEMORY_ERROR;

    for (i = 0; i < hg->nEdge; ++i)
        for (j = hg->hindex[i]; j < hg->hindex[i+1]; ++j)
            ++lpins[i*p+parts[hg->hvertex[j]]];
    if (hg->nEdge)
        MPI_Allreduce(lpins, pins, p*hg->nEdge, MPI_INT, MPI_SUM, 
                      hgc->row_comm);
    
    if (hg->nVtx && !(lneigh[0]  = (int*) ZOLTAN_MALLOC(2 * hg->nVtx * sizeof(int))))
        MEMORY_ERROR;
    if (!hgc->myProc_y) 
        if (hg->nVtx && !(neigh[0]  = (int*) ZOLTAN_MALLOC(2 * hg->nVtx * sizeof(int))))
            MEMORY_ERROR;

    if (hg->nVtx) {
        lneigh[1] = &(lneigh[0][hg->nVtx]);
        if (!hgc->myProc_y)
            neigh[1] = &(neigh[0][hg->nVtx]);
    }

    for (i = 0; i < hg->nVtx; ++i) {
        int pno = parts[i];
        lneigh[0][i] = lneigh[1][i] = 0;
        for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
            int edge = hg->vedge[j], k;
            lneigh[0][i] += (pins[edge*p+pno]-1); /* exclude the vertex's itself */
            for (k=0; k<p; ++k)
                if (k!=pno)
                    lneigh[1][i] += pins[edge*p+k];            
        }
    }
    
    if (hg->nVtx) 
        MPI_Reduce(lneigh[0], neigh[0], 2*hg->nVtx, MPI_INT, MPI_SUM, 0, hgc->col_comm);

    if (!hgc->myProc_y) {
        int alone=0, galone=0;        
        for (i=0; i<hg->nVtx; ++i)
            if (!neigh[0] && neigh[1]) {
                ++alone;
                if (alone<10)
                    uprintf(hgc, "vertex %d is alone in part %d but it has %d neighbours (!overcounted!) on other %d parts\n", i, parts[i], neigh[1]);
            }
        MPI_Reduce(&alone, &galone, 1, MPI_INT, MPI_SUM, 0, hgc->row_comm);
        if (!hgc->myProc)
            uprintf(hgc, "There are %d left-alone vertices\n", galone);        
    }
End:
    Zoltan_Multifree(__FILE__,__LINE__, 4, &lpins, &pins, &lneigh[0], &neigh[0]);
    return ierr;
}
コード例 #20
0
ファイル: DD_Update.c プロジェクト: agrippa/Trilinos
int Zoltan_DD_Update (
 Zoltan_DD_Directory *dd,  /* directory state information              */
 ZOLTAN_ID_PTR gid,        /* Incoming list of GIDs to update          */
 ZOLTAN_ID_PTR lid,        /* Incoming corresponding LIDs (optional)   */
 char *user,               /* Incoming list of user data (optional)    */
 int *partition,           /* Optional, grouping of GIDs to partitions */
 int count)                /* Number of GIDs in update list            */
{
   int             *procs = NULL;   /* list of processors to contact   */
   DD_Update_Msg   *ptr   = NULL;
   ZOLTAN_COMM_OBJ *plan  = NULL;   /* for efficient MPI communication */
   char            *sbuff = NULL;   /* send buffer                     */
   char            *sbufftmp = NULL;/* pointer into send buffer        */
   char            *rbuff = NULL;   /* receive buffer                  */
   char            *rbufftmp = NULL;/* pointer into receive buffer     */
   int              nrec = 0;       /* number of receives to expect    */
   int              i;
   int              err;
   int              errcount = 0;   /* count of GIDs not found, added  */
   char             str[100];       /* build error message string      */
   char            *yo = "Zoltan_DD_Update";


   /* input sanity checking */
   if (dd == NULL || count < 0 || (gid == NULL && count > 0))  {
      ZOLTAN_PRINT_ERROR ((dd == NULL ? ZOLTAN_DD_NO_PROC : dd->my_proc), yo,
       "Invalid input argument");
      return ZOLTAN_FATAL;
   }
   if (dd->debug_level > 4)
      ZOLTAN_TRACE_IN(dd->my_proc, yo, NULL);

   /* part of initializing the error checking process             */
   /* for each linked list head, walk its list resetting errcheck */
   if (dd->debug_level)
      for (i = 0; i < dd->table_length; i++) {
         DD_NodeIdx nodeidx;
         for (nodeidx = dd->table[i]; nodeidx != -1;
              nodeidx = dd->nodelist[nodeidx].next)
            dd->nodelist[nodeidx].errcheck = ZOLTAN_DD_NO_PROC;
      }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After reset errcheck");

   /* allocate memory for list of processors to contact */
   if (count) {
      procs = (int*) ZOLTAN_MALLOC (sizeof(int) * count);
      if (procs == NULL)  {
         ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Unable to malloc proc list");
         err = ZOLTAN_MEMERR;
         goto fini;
      }
   }

   /* allocate memory for DD_Update_Msg send buffer */
   if (count)  {
      sbuff = (char*) ZOLTAN_CALLOC (count, dd->update_msg_size);
      if (sbuff == NULL)  {
         ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Unable to malloc send buffer");
         err = ZOLTAN_MEMERR;
         goto fini;
      }
   }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After mallocs");

   /* for each GID given, fill in contact list and then message structure */
   sbufftmp = sbuff;
   for (i = 0; i < count; i++)  {
      procs[i] = dd->hash(gid + i*dd->gid_length, dd->gid_length, dd->nproc,
                          dd->hashdata, dd->hashfn);
      ptr      = (DD_Update_Msg*) sbufftmp;
      sbufftmp += dd->update_msg_size;

      ptr->lid_flag       = (lid)  ? 1 : 0;
      ptr->user_flag      = (user) ? 1 : 0;
      ptr->partition_flag = (partition) ? 1 : 0;
      ptr->partition      = (partition) ? *(partition + i) :  -1;
      ptr->owner          = dd->my_proc;

      ZOLTAN_SET_ID (dd->gid_length, ptr->gid, gid + i * dd->gid_length);
      if (lid) {
         ZOLTAN_SET_ID(dd->lid_length, ptr->gid + dd->gid_length,
                       lid + i * dd->lid_length);
      }
      else {
         memset(ptr->gid + dd->gid_length, 0, dd->lid_length);
      }
      if (user) {
         memcpy(ptr->gid + (dd->gid_length + dd->lid_length),
                user + (size_t)i * (size_t)(dd->user_data_length),
                dd->user_data_length);
      }
      else {
         memset(ptr->gid + (dd->gid_length + dd->lid_length), 0,
                dd->user_data_length);
      }
   }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After fill contact list");

   /* now create efficient communication plan */
   err = Zoltan_Comm_Create (&plan, count, procs, dd->comm,
    ZOLTAN_DD_UPDATE_MSG_TAG, &nrec);
   if (err != ZOLTAN_OK)  {
      ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Comm_Create error");
      goto fini;
   }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Comm_Create");

   /* If dd has no nodes allocated (e.g., first call to DD_Update; 
    * create the nodelist and freelist 
    */
   if (nrec && dd->nodelistlen == 0) {
      DD_Memory_Alloc_Nodelist(dd, (DD_NodeIdx) nrec, 0.); 
                               /* TODO Add overalloc parameter */
   }

   /* allocate receive buffer for nrec DD_Update_Msg structures */
   if (nrec)  {
      rbuff = (char*)ZOLTAN_MALLOC((size_t)nrec*(size_t)(dd->update_msg_size));
      if (rbuff == NULL)  {
         ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Receive buffer malloc failed");
         err = ZOLTAN_MEMERR;
         goto fini;
      }
   }

   /* send my update messages & receive updates directed to me */
   err = Zoltan_Comm_Do (plan, ZOLTAN_DD_UPDATE_MSG_TAG+1, sbuff,
    dd->update_msg_size, rbuff);
   if (err != ZOLTAN_OK)  {
      ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Comm_Do error");
      goto fini;
   }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Comm_Do");

   /* for each message rec'd, update local directory information */
   errcount = 0;
   rbufftmp = rbuff;
   for (i = 0; i < nrec; i++)  {
      ptr = (DD_Update_Msg *) rbufftmp;
      rbufftmp += dd->update_msg_size;

      err = DD_Update_Local (dd, ptr->gid,
       (ptr->lid_flag)  ? (ptr->gid + dd->gid_length) : NULL,
       (char*)((ptr->user_flag)?(ptr->gid+(dd->gid_length+dd->lid_length)):NULL),
       (ptr->partition_flag) ? (ptr->partition) : -1,  /* illegal partition */
       ptr->owner);

      if (err != ZOLTAN_OK)
         ++errcount;
   }
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Local update");

   err = ZOLTAN_OK;
   if (dd->debug_level)  /* overwrite error return if extra checking is on */
      err = (errcount) ? ZOLTAN_WARN : ZOLTAN_OK;

fini:
   ZOLTAN_FREE (&procs);
   ZOLTAN_FREE (&sbuff);
   ZOLTAN_FREE (&rbuff);
   Zoltan_Comm_Destroy (&plan);

   if (dd->debug_level)  {
      sprintf (str, "Processed %d GIDs (%d local), %d GID errors", count,
       nrec, errcount);
      ZOLTAN_PRINT_INFO (dd->my_proc, yo, str);
   }

   if (dd->debug_level > 4)
      ZOLTAN_TRACE_OUT(dd->my_proc, yo, NULL);
   return err;
}
コード例 #21
0
ファイル: DD_Find.c プロジェクト: abhishek4747/pv-zoltan
int Zoltan_DD_Find (
 Zoltan_DD_Directory *dd, /* contains directory state information        */
 ZOLTAN_ID_PTR gid,       /* Incoming list of GIDs to get owners proc    */
 ZOLTAN_ID_PTR lid,       /* Outgoing corresponding list of LIDs         */
 char *data,              /* Outgoing optional corresponding user data   */
 int *partition,          /* Outgoing optional partition information     */
 int  count,              /* Count of GIDs in above list (in)            */
 int *owner)              /* Outgoing optional list of data owners       */
{
   ZOLTAN_COMM_OBJ *plan  = NULL;     /* efficient MPI communication     */
   char            *rbuff = NULL;     /* receive buffer                  */
   char            *rbufftmp = NULL;  /* pointer into receive buffer     */
   char            *sbuff = NULL;     /* send buffer                     */
   char            *sbufftmp = NULL;  /* pointer into send buffer        */
   int             *procs = NULL;     /* list of processors to contact   */
   DD_Find_Msg     *ptr   = NULL;
   int              i;
   int              nrec;             /* number of messages to receive   */
   int              err = ZOLTAN_OK;  /* return error condition          */
   int              errcount;         /* count of GIDs not found         */
   char            *yo = "Zoltan_DD_Find";


   /* input sanity check */
   if (dd == NULL || count < 0 || (gid == NULL && count > 0))  {
      ZOLTAN_PRINT_ERROR (dd ? dd->my_proc : ZOLTAN_DD_NO_PROC, yo,
       "Invalid input argument");
      return ZOLTAN_FATAL;
   }
   if (dd->debug_level > 4)
      ZOLTAN_TRACE_IN(dd->my_proc, yo, NULL);

   /* allocate memory for processors to contact for directory info */
   if (count)  {
      procs = (int*) ZOLTAN_MALLOC (sizeof(int) * count);
      if (procs == NULL) {
         ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Unable to malloc proc list");
         if (dd->debug_level > 4)
           ZOLTAN_TRACE_OUT(dd->my_proc, yo, NULL);
         return ZOLTAN_MEMERR;
      }
   }

   /* allocate memory for DD_Find_Msg send buffer */
   if (count)  {
      sbuff = (char*) ZOLTAN_CALLOC (count, dd->find_msg_size);
      if (sbuff == NULL)  {
         ZOLTAN_FREE (&procs);
         ZOLTAN_PRINT_ERROR (dd->my_proc, yo, "Unable to malloc send buffer");
         if (dd->debug_level > 4)
            ZOLTAN_TRACE_OUT(dd->my_proc, yo, NULL);
         return ZOLTAN_MEMERR;
      }
   }

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After mallocs");

   /* for each GID, fill DD_Find_Msg buffer and contact list */
   sbufftmp = sbuff;
   for (i = 0; i < count; i++)  {
      procs[i] = dd->hash (gid + i*dd->gid_length, dd->gid_length, dd->nproc,
                           dd->hashdata, dd->hashfn);
      ptr      = (DD_Find_Msg*) sbufftmp;
      sbufftmp += dd->find_msg_size;

      ptr->index = i;
      ptr->proc  = procs[i];
      ZOLTAN_SET_ID (dd->gid_length, ptr->id, gid + i*dd->gid_length);
   }
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After fill");

   /* create efficient communication plan */
   err = Zoltan_Comm_Create (&plan, count, procs, dd->comm,
    ZOLTAN_DD_FIND_MSG_TAG, &nrec);
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Comm_Create");
   if (err != ZOLTAN_OK)
      goto fini;

   /* allocate receive buffer */
   if (nrec)  {
      rbuff = (char*) ZOLTAN_MALLOC ((size_t)nrec*(size_t)(dd->find_msg_size));
      if (rbuff == NULL)  {
         err = ZOLTAN_MEMERR;
         goto fini;
      }
   }

   /* send out find messages across entire system */
   err = Zoltan_Comm_Do (plan, ZOLTAN_DD_FIND_MSG_TAG+1, sbuff,
    dd->find_msg_size, rbuff);
   if (err != ZOLTAN_OK)
      goto fini;

   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Comm_Do");

   /* get find messages directed to me, fill in return information */
   errcount = 0;
   rbufftmp = rbuff;
   for (i = 0; i < nrec; i++)  {
      ptr = (DD_Find_Msg*) rbufftmp;
      rbufftmp += dd->find_msg_size;
      err = DD_Find_Local (dd, ptr->id, ptr->id,
                           (char *)(ptr->id + dd->max_id_length),
                           &ptr->partition, &ptr->proc);
      if (err == ZOLTAN_WARN)
          ++errcount;
   }
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After fill in return info");

   /* send return information back to requester */
   err = Zoltan_Comm_Do_Reverse(plan, ZOLTAN_DD_FIND_MSG_TAG+2, rbuff,
    dd->find_msg_size, NULL, sbuff);
   if (err != ZOLTAN_OK)
      goto fini;
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After Comm_Reverse");

   /* fill in user supplied lists with returned information */
   sbufftmp = sbuff;
   for (i = 0; i < count; i++) {
      ptr = (DD_Find_Msg*) sbufftmp;
      sbufftmp += dd->find_msg_size;

      if (owner)
         owner[ptr->index] = ptr->proc;
      if (partition)
         partition[ptr->index] = ptr->partition ;
      if (lid)
         ZOLTAN_SET_ID(dd->lid_length,lid+ptr->index*dd->lid_length,ptr->id);
      if (data)
         memcpy(data + (size_t)(ptr->index) * (size_t)(dd->user_data_length),
                ptr->id + dd->max_id_length, dd->user_data_length);
   }
   if (dd->debug_level > 6)
      ZOLTAN_PRINT_INFO(dd->my_proc, yo, "After fill return lists");
/*    err = ZOLTAN_OK;     */

   MPI_Allreduce(&errcount, &err, 1, MPI_INT, MPI_SUM, dd->comm);
   err = (err) ? ZOLTAN_WARN : ZOLTAN_OK;

   /* if at least one GID was not found, potentially notify caller of error */
   if (dd->debug_level > 0)  {
      char str[100];      /* diagnostic message string */
      sprintf (str, "Processed %d GIDs, GIDs not found: %d", count, errcount);
      ZOLTAN_PRINT_INFO (dd->my_proc, yo, str);
   }

fini:
   ZOLTAN_FREE (&sbuff);
   ZOLTAN_FREE (&rbuff);
   ZOLTAN_FREE (&procs) ;
   Zoltan_Comm_Destroy (&plan);

   if (dd->debug_level > 4)
      ZOLTAN_TRACE_OUT(dd->my_proc, yo, NULL);
   return err;
}
コード例 #22
0
ファイル: phg_Vcycle.c プロジェクト: xunzhang/ESMF_Regridding
double Zoltan_PHG_Compute_NetCut(
  PHGComm *hgc,
  HGraph *hg,
  Partition part,
  int p
)
{
/* Calculates the cutsize of a partition by summing the weight of all edges
 * which span more than one part. Time O(|H|). 
 * Results are returned on all processors of hgc->Communicator. 
 */
  int i, j, *netpart = NULL, *allparts = NULL;    
  double cut = 0.0, totalcut=0.0;
  char *yo = "Zoltan_PHG_Compute_NetCut";

  if (hg->nEdge && !(netpart = (int*) ZOLTAN_CALLOC (hg->nEdge, sizeof(int)))) {
    ZOLTAN_PRINT_ERROR (hgc->myProc, yo, "Memory error.");
    return ZOLTAN_MEMERR;
  }

  if (!hgc->myProc_x)
    if (hg->nEdge && 
       !(allparts = (int*) ZOLTAN_CALLOC(hgc->nProc_x*hg->nEdge,sizeof(int)))) {
      ZOLTAN_PRINT_ERROR (hgc->myProc, yo, "Memory error.");
      return ZOLTAN_MEMERR;
    }

  for (i = 0; i < hg->nEdge; ++i) 
    if (hg->hindex[i] >= hg->hindex[i+1])
       netpart[i] = -1;
    else  {
       j = hg->hindex[i];
       netpart[i] = part[hg->hvertex[j]];
       for (++j; j < hg->hindex[i+1]  &&  part[hg->hvertex[j]] == netpart[i]; ++j);
         if (j != hg->hindex[i+1])
           netpart[i] = -2;
    }

  if (hg->nEdge)
    MPI_Gather(netpart, hg->nEdge, MPI_INT, allparts, hg->nEdge, MPI_INT, 0, 
               hgc->row_comm);
  ZOLTAN_FREE (&netpart);

  if (!hgc->myProc_x) { 
    for (i = 0; i < hg->nEdge; ++i) {
      int p=-1;
      for (j = 0; j < hgc->nProc_x; ++j)
        if (allparts[j*hg->nEdge+i] == -2)
          break;
        else if (allparts[j*hg->nEdge+i] >= 0) {
          if (p == -1)
            p = allparts[j*hg->nEdge+i];
          else if (p != allparts[j*hg->nEdge+i])
            break;
        }            
      if (j < hgc->nProc_x)
        cut += (hg->ewgt ? hg->ewgt[i] : 1.0);
    }
        
    ZOLTAN_FREE (&allparts);
    MPI_Reduce (&cut, &totalcut, 1, MPI_DOUBLE, MPI_SUM, 0, hgc->col_comm);
  }

  MPI_Bcast (&totalcut, 1, MPI_DOUBLE, 0, hgc->Communicator);
  return totalcut;    
}
コード例 #23
0
ファイル: phg_refinement.c プロジェクト: agrippa/Trilinos
static int serial_fm2 (ZZ *zz,
    HGraph *hg,
    int p,
    float *part_sizes,
    Partition part,
    PHGPartParams *hgp,
    float bal_tol)
{
int    i, j, vertex, edge, *pins[2], *locked = 0, *locked_list = 0, round = 0;
double total_weight, part_weight[2], max_weight[2];
double cutsize_beforepass, best_cutsize, *gain = 0;
HEAP   heap[2];
int    steplimit;
char   *yo="serial_fm2";
int    part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
#ifdef HANDLE_ISOLATED_VERTICES    
 int    isocnt=0;
#endif
#ifdef _DEBUG
 double tw0, imbal, cutsize;
#endif

double error, best_error;
int    best_imbalance, imbalance;

  if (p != 2) {
     ZOLTAN_PRINT_ERROR(zz->Proc, yo, "p!=2 not allowed for local_fm2.");
     return ZOLTAN_FATAL;
     }

  if (hg->nEdge == 0)
     return ZOLTAN_OK;

  /* Calculate the weights in each partition and total, then maxima */
  part_weight[0] = 0.0;
  part_weight[1] = 0.0;
  if (hg->vwgt)  {
     for (i = 0; i < hg->nVtx; i++)
        part_weight[part[i]] += hg->vwgt[i*hg->VtxWeightDim];
     total_weight = part_weight[0] + part_weight[1];
  }
  else  {
     total_weight = (double)(hg->nVtx);
     for (i = 0; i < hg->nVtx; i++)
        part_weight[part[i]] += 1.0;
  }
  max_weight[0] = total_weight * bal_tol * part_sizes[0];
  max_weight[1] = total_weight * bal_tol * part_sizes[part_dim];

#ifdef _DEBUG
  tw0 = total_weight * part_sizes[0];
#endif
  
  if (!(pins[0]     = (int*)   ZOLTAN_CALLOC(2*hg->nEdge, sizeof(int)))
   || !(locked      = (int*)   ZOLTAN_CALLOC(hg->nVtx,    sizeof(int)))
   || !(locked_list = (int*)   ZOLTAN_CALLOC(hg->nVtx,    sizeof(int)))
   || !(gain        = (double*)ZOLTAN_CALLOC(hg->nVtx,    sizeof(double))) ) {
     Zoltan_Multifree(__FILE__,__LINE__, 4, &pins[0], &locked, &locked_list,
          &gain);
     ZOLTAN_PRINT_ERROR(zz->Proc, yo, "Insufficient memory.");
     return ZOLTAN_MEMERR;
  }
  pins[1] = &(pins[0][hg->nEdge]);

  /* Initial calculation of the pins distribution and gain values */
  for (i = 0; i < hg->nEdge; i++)
     for (j = hg->hindex[i]; j < hg->hindex[i+1]; j++)
        (pins[part[hg->hvertex[j]]][i])++;
  for (i = 0; i < hg->nVtx; i++)
     for (j = hg->vindex[i]; j < hg->vindex[i+1]; j++) {
        edge = hg->vedge[j];
        if (pins[part[i]][edge] == 1)
           gain[i] += (hg->ewgt ? hg->ewgt[edge] : 1.0);
        else if (pins[1-part[i]][edge] == 0)
           gain[i] -= (hg->ewgt ? hg->ewgt[edge] : 1.0);
     }

  /* Initialize the heaps and fill them with the gain values */
  Zoltan_Heap_Init(zz, &heap[0], hg->nVtx);
  Zoltan_Heap_Init(zz, &heap[1], hg->nVtx);  
  for (i = 0; i < hg->nVtx; i++)
      if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
#ifdef HANDLE_ISOLATED_VERTICES          
          if (hg->vindex[i+1]==hg->vindex[i]) { /* isolated vertex */
              part_weight[part[i]] -= (hg->vwgt ? hg->vwgt[i*hg->VtxWeightDim] 
                                                : 1.0);
              part[i] = -(part[i]+1); /* remove those vertices from that part*/
              ++isocnt;
          } else
#endif
              Zoltan_Heap_Input(&heap[part[i]], i, gain[i]);
      }
#ifdef _DEBUG
      else {
          int pp = (hg->fixed_part[i] < hg->bisec_split) ? 0 : 1;
          if (part[i]!=pp) 
              errexit("%s: beginning of pass for hg->info=%d vertex %d is fixed at %d bisec_split is %d but its part is %d\n", uMe(hg->comm), hg->info, i, hg->fixed_part[i], hg->bisec_split, part[i]);
          
              
      }
#endif
  Zoltan_Heap_Make(&heap[0]);
  Zoltan_Heap_Make(&heap[1]);

  /* Initialize given partition as best partition */
  best_cutsize = cutsize_beforepass = Zoltan_PHG_Compute_NetCut(hg->comm, hg, part);
  best_error = MAX (part_weight[0]-max_weight[0], part_weight[1]-max_weight[1]);
  best_imbalance = (part_weight[0]>max_weight[0])||(part_weight[1]>max_weight[1]);
  do {
    int step = 0, no_better_steps = 0, number_locked = 0, best_locked = 0;
    int sour, dest;
    double cur_cutsize=best_cutsize;

    round++;
    cutsize_beforepass = best_cutsize;
    if (hgp->output_level > PHG_DEBUG_LIST)
      printf("ROUND %d:\nSTEP VERTEX  PARTS MAX_WGT CHANGE CUTSIZE\n",round);

    steplimit = (hgp->fm_max_neg_move < 0) ? hg->nVtx : hgp->fm_max_neg_move;
    /* steplimit = hg->nVtx/4;  Robsys previous choice */

    while (step < hg->nVtx && no_better_steps < steplimit) {
        step++;
        no_better_steps++;

        if (Zoltan_Heap_Empty(&heap[0]))
           sour = 1;
        else if (Zoltan_Heap_Empty(&heap[1]))
           sour = 0;
        else if (part_weight[0] > max_weight[0])
           sour = 0;
        else if (part_weight[1] > max_weight[1])
           sour = 1;
        else if (Zoltan_Heap_Max_Value(&heap[0])
              >  Zoltan_Heap_Max_Value(&heap[1]))
           sour = 0;
        else
           sour = 1;
        dest = 1-sour;
        vertex = Zoltan_Heap_Extract_Max(&heap[sour]);
        if (vertex<0)
            break;

        locked[vertex] = part[vertex] + 1;
        locked_list[number_locked++] = vertex;
        cur_cutsize -= gain[vertex];        
        
        Zoltan_HG_move_vertex (hg, vertex, sour, dest, part, pins, gain, heap);

#ifdef _DEBUG
        imbal = (tw0==0.0) ? 0.0 : (part_weight[0]-tw0)/tw0;
        uprintf(hg->comm, "%4d: SEQ moving %4d from %d to %d cut=%6.0lf bal=%.3lf\n", step, vertex, sour, dest, cur_cutsize, imbal);
        /* Just for debugging */
        cutsize = Zoltan_PHG_Compute_NetCut(hg->comm, hg, part);
        if (cur_cutsize!=cutsize) {
            errexit("%s: SEQ after move cutsize=%.2lf Verify: total=%.2lf\n", uMe(hg->comm), cur_cutsize,
                    cutsize);
        }
#endif
        
        part_weight[sour] -= (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        part_weight[dest] += (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);

        error = MAX (part_weight[0]-max_weight[0],part_weight[1]-max_weight[1]);
        imbalance = (part_weight[0]>max_weight[0])||(part_weight[1]>max_weight[1]);

        if ( ( best_imbalance && (error < best_error))
          || (!imbalance && (cur_cutsize < best_cutsize)))  {
            best_error   = error;
            best_imbalance = imbalance;
            best_locked  = number_locked;
            best_cutsize = cur_cutsize;
            no_better_steps = 0;
        }
        if (hgp->output_level > PHG_DEBUG_LIST+1)
           printf ("%4d %6d %2d->%2d %7.2f %f %f\n", step, vertex, sour, dest,
            error, cur_cutsize - cutsize_beforepass, cur_cutsize);
    }

#ifdef _DEBUG
    uprintf(hg->comm, "SEQ Best CUT=%6.0lf at move %d\n", best_cutsize, best_locked);
#endif
    
    /* rollback */
     while (number_locked != best_locked) {
        vertex = locked_list[--number_locked];
        sour = part[vertex];
        dest = locked[vertex] - 1;

        Zoltan_HG_move_vertex (hg, vertex, sour, dest, part, pins, gain, heap);

        part_weight[sour] -= (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        part_weight[dest] += (hg->vwgt ? hg->vwgt[vertex*hg->VtxWeightDim] 
                                       : 1.0);
        Zoltan_Heap_Input(&heap[dest], vertex, gain[vertex]);
        locked[vertex] = 0;
     }

     /* only update data structures if we're going to do another pass */
     if ((best_cutsize < cutsize_beforepass) &&  (round < hgp->fm_loop_limit)) {         
         while (number_locked) {
             vertex = locked_list[--number_locked];
             locked[vertex] = 0;
             Zoltan_Heap_Input(&heap[part[vertex]], vertex, gain[vertex]);
         }
         
         Zoltan_Heap_Make(&(heap[0]));
         Zoltan_Heap_Make(&(heap[1]));
     }
  } while ((best_cutsize < cutsize_beforepass) &&  (round < hgp->fm_loop_limit));

#ifdef HANDLE_ISOLATED_VERTICES
  if (isocnt) {
#ifdef _DEBUG      
      double isoimbalbefore, isoimbal;
#endif
      double targetw0;
      
      targetw0 = total_weight * part_sizes[0];
#ifdef _DEBUG      
      isoimbalbefore = (targetw0==0) ? 0.0 : (part_weight[0] - targetw0)/ targetw0;
#endif
      for (i=0; i < hg->nVtx; ++i)
          if (!hgp->UseFixedVtx || hg->fixed_part[i]<0) {
              if (hg->vindex[i+1]==hg->vindex[i])  { /* go over isolated vertices */
                  int npno = (part_weight[0] <  targetw0) ? 0 : 1;
                  part_weight[npno] += (hg->vwgt ? hg->vwgt[i*hg->VtxWeightDim] 
                                                 : 1.0);                
                  part[i] = npno;
              }
          }
#ifdef _DEBUG      
      isoimbal = (targetw0==0) ? 0.0 : (part_weight[0] - targetw0)/ targetw0;
      uprintf(hg->comm, "SEQ %d isolated vertices, balance before: %.3lf  after: %.3lf\n", isocnt, isoimbalbefore, isoimbal);
#endif
  }
#endif  
  
  /* gain_check (hg, gain, part, pins); */
  Zoltan_Multifree(__FILE__,__LINE__, 4, &pins[0], &locked, &locked_list, &gain);
  Zoltan_Heap_Free(&heap[0]);
  Zoltan_Heap_Free(&heap[1]);

  return ZOLTAN_OK;
}
コード例 #24
0
ファイル: phg_two_ways.c プロジェクト: haripandey/trilinos
int
Zoltan_PHG_2ways_hyperedge_partition (
  ZZ *zz,                            /* Input : Zoltan data structure */
  HGraph *hg,
  Partition parts,
  Zoltan_PHG_Tree *tree,
  struct Zoltan_DD_Struct * gnoToGID,
  struct Zoltan_DD_Struct **dd,
  int *numParts,
  int **sizeParts
)
{
  int ierr = ZOLTAN_OK;
  char *yo = "Zoltan_PHG_2ways_hyperedge_partition";
  int nEdge, hEdge;
  int *interval;
  int *partnumber = NULL;
  int tree_size;
  int *rowpart =NULL;
  int *rowGNO = NULL;
  ZOLTAN_ID_PTR rowGID=NULL;
  int index;
  int offset;

  ZOLTAN_TRACE_ENTER(zz, yo);

  nEdge = hg->nEdge;
  fprintf (stderr, "HG (%d %d x %d) : %d %d\n", hg->comm->myProc, hg->comm->myProc_x, hg->comm->myProc_y,  hg->nVtx, nEdge);

  interval = (int*)ZOLTAN_MALLOC(nEdge*2*sizeof(int));
  if ((nEdge > 0 ) && (interval == NULL)) MEMORY_ERROR;

  tree_size = get_tree_size(tree) + 1;
  for (index = 0 ; index < nEdge ; ++index){
    SET_MIN_NODE(interval, index, tree_size);
    SET_MAX_NODE(interval, index, -1);
  }

  /* Update interval with the local knowledge */
  /* XXX: I loop on the hyperedges, as I think it's more cache friendly
   * and it allows me to discoupled the computation if a non blocking MPI_Reduce is
   * available
   */
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge){
    int part;
    int max = -1;                     /* Trick : we use the initialized values */
    int min = tree_size + 1;

    for (index = hg->hindex[hEdge] ; index < hg->hindex[hEdge+1] ; ++ index) {
      part = parts[hg->hvertex[index]];

      max = MAX(max, part);
      min = MIN(min, part);
    }
    SET_MIN_NODE(interval, hEdge, min);
    SET_MAX_NODE(interval, hEdge, max);
  }

  /* Update results to view the complete hyperedges */
  Zoltan_AllReduceInPlace (interval, 2*nEdge, MPI_INT, MPI_MAX, hg->comm->row_comm);

  /* Now I have to compute the partition of hyperedges according to the "interval"
   * and the tree */

  /* First, compute the partition number corresponding to the nodes in the tree */
  partnumber = compute_part_number(tree);
  if (partnumber == NULL) {
    ierr = ZOLTAN_FATAL;
    goto End;
  }

  (*numParts) = get_tree_size(tree);

  rowpart = (int*) ZOLTAN_MALLOC(nEdge*sizeof(int));
  if ((nEdge > 0) && (rowpart == NULL)) MEMORY_ERROR;

  rowGNO = (int*) ZOLTAN_MALLOC(nEdge*sizeof(int));
  if ((nEdge > 0) && (rowGNO == NULL)) MEMORY_ERROR;

  (*sizeParts) = (int*)ZOLTAN_CALLOC((*numParts), sizeof(int));
  if (*numParts && (*sizeParts) == NULL) MEMORY_ERROR;

  offset = hg->dist_y[hg->comm->myProc_y];
  /* Then we search we is the hyperedge in the tree */
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge) {
    int node;
    node = find_interval_in_tree(tree, interval+2*hEdge);
    rowpart[hEdge] = partnumber[node];
    (*sizeParts)[rowpart[hEdge]] ++;
    rowGNO[hEdge] = EDGE_LNO_TO_GNO(hg, hEdge);
#if 0
    fprintf (stderr, "%d : %d (%d : %d - %d)\n", rowGNO[hEdge], rowpart[hEdge], node, -interval[2*hEdge], interval[2*hEdge+1]);
#endif
  }

  partnumber += 1;
  ZOLTAN_FREE(&partnumber);
  ZOLTAN_FREE(&interval);

  /* Compute number of elements per parts */
  /* TODO: support processor which are not part of the distribution */

  /* Update results to view the complete hyperedges */
  Zoltan_AllReduceInPlace ((*sizeParts), (*numParts), MPI_INT, MPI_SUM, hg->comm->col_comm);


  /* Export results to data directory */
  /* First, get the GIDs of our edges */
  rowGID = ZOLTAN_MALLOC_GID_ARRAY(zz, nEdge);
  if (nEdge && rowGID == NULL) MEMORY_ERROR;
  ierr = Zoltan_DD_Find (gnoToGID , (ZOLTAN_ID_PTR)rowGNO, rowGID, NULL, NULL,
			 nEdge, NULL);
  ZOLTAN_FREE(&rowGNO);

  ierr = Zoltan_DD_Create (dd, zz->Communicator, zz->Num_GID, 1, 0, nEdge, 0);
  CHECK_IERR;

  /* Make our new numbering public */
  Zoltan_DD_Update (*dd, (ZOLTAN_ID_PTR)rowGID, (ZOLTAN_ID_PTR) rowpart, NULL,  NULL, nEdge);

#ifdef CEDRIC_PRINT
  for (hEdge = 0 ; hEdge < nEdge ; ++hEdge) {
    fprintf (stderr, "%d : %d\n", rowGID[hEdge], rowpart[hEdge]);
  }
#endif


 End:
  ZOLTAN_FREE(&rowGID);
  ZOLTAN_FREE(&rowGNO);
  ZOLTAN_FREE(&rowpart);

  if (partnumber != NULL)
    partnumber += 1;
  ZOLTAN_FREE(&partnumber);
  ZOLTAN_FREE(&interval);

  ZOLTAN_TRACE_EXIT(zz, yo);

  return (ierr);
}
コード例 #25
0
static int seq_part (
  ZZ *zz, 
  HGraph *hg,         /* the hypergraph, containing vertex weights */
  int *order,         /* the ordering of the vertices */
  int p,              /* desired number of partitions */
  float *part_sizes,  /* target partition sizes */
  Partition part,     /* Output: partition numbers */
  PHGPartParams *hgp  /* misc hypergraph parameters */
)
{
  int i, j, pnumber;
  int vwgtdim = hg->VtxWeightDim;
  int part_dim = (hg->VtxWeightDim ? hg->VtxWeightDim : 1);
  double weight_sum = 0.0, part_sum, old_sum, cutoff;
  double psize_sum = 0.0;
  double *fixed_wgts = NULL;
  char *yo = "seq_part";

  ZOLTAN_TRACE_ENTER(zz, yo);

  if (part_sizes==NULL){
    /* part_sizes should always exist, even with uniform partitions */
    return ZOLTAN_FATAL;
  }

  if (p<1){
    /* should never happen */
    return ZOLTAN_FATAL;
  }

  if (hgp->UsePrefPart) {
    fixed_wgts = (double *) ZOLTAN_CALLOC(p, sizeof(double));
  }

  /* Sum up all the vertex weights. */
  for (i=0; i<hg->nVtx; i++){
    weight_sum += hg->vwgt[i*vwgtdim];
    if (hgp->UsePrefPart)
      if (hg->pref_part[i] >= 0){
/*
        uprintf(hg->comm, "bisec_split=%d, i=%d, pref_part=%d\n", hg->bisec_split, i, hg->pref_part[i]);
*/
        /* Set partition number for fixed vtx. */
        if (hg->bisec_split < 0){
          /* direct k-way, use part numbers directly */
          part[i] = hg->pref_part[i];
        }
        else{
          /* recursive bisection, map to 0-1 part numbers */
          part[i] = (hg->pref_part[i] < hg->bisec_split ? 0 : 1);
        }
        /* Add up weights of pref_part vertices for each partition */
        fixed_wgts[part[i]] += hg->vwgt[i*vwgtdim];
      }
  }
 

  /* Sum up all the target partition weights. */
  /* Only use first vweight for now. */
  for (i=0; i<p; i++)
    psize_sum += part_sizes[i*part_dim];

  pnumber = 0; /* Assign next vertex to partition no. pnumber */
  part_sum = (fixed_wgts ? fixed_wgts[0] : 0.); /* Weight of fixed vertices */

  /* Set cutoff for current partition. (Include fixed vtx.) */
  cutoff = weight_sum*part_sizes[0]/psize_sum;

  /* Loop through all vertices in specified order, and assign
     partition numbers.  */                                        
  for (i=0; i<hg->nVtx; i++) {
    /* If order==NULL, then use linear order. */
    j = order ? order[i] : i;
    /* for non-fixed vertices */
    if ((!hgp->UsePrefPart) || (hg->pref_part[j] == -1)){
      part[j] = pnumber;
      old_sum = part_sum;
      part_sum += hg->vwgt[j*vwgtdim];
      /* Check if we passed the cutoff and should start a new partition */
      if ((pnumber+1) < p && part_sum > cutoff) {
        pnumber++; /* Increase current part number */
        /* Decide if current vertex should be moved to the next partition */
        if ((part_sum-cutoff) > (cutoff-old_sum)) { 
          part[j]++;
          part_sum = old_sum;
        }
        weight_sum -= part_sum;
        /* Initialize part_sum for next partition no. */
        part_sum = (fixed_wgts ? fixed_wgts[pnumber] : 0.);
        if (part[j] == pnumber)
          part_sum += hg->vwgt[j*vwgtdim];
        /* Update cutoff. */
        psize_sum -= part_sizes[(pnumber-1)*part_dim];
        cutoff = weight_sum*part_sizes[pnumber*part_dim]/psize_sum;
      }
    }
    if (hgp->output_level >= PHG_DEBUG_PRINT)
      printf("COARSE_PART i=%2d, part[%2d] = %2d, part_sum=%f, cutoff=%f\n", 
       i, j, part[j], part_sum, cutoff);
  }

  if (fixed_wgts) ZOLTAN_FREE(&fixed_wgts);

  ZOLTAN_TRACE_EXIT(zz, yo);
  return ZOLTAN_OK;
}