Exemplo n.º 1
0
double amax2D(double **M, int len){
	double max;
	int i, j;

	max = amax(M[0], len);
	for(i=1;i<=len;i++){
		if(amax(M[i], len) > max){
			max = amax(M[i], len);
		} else {
			continue;
		}
	}
	return(max);
}
Exemplo n.º 2
0
template<class T> bool next_combination(T first1, T last1, T first2, T last2)
{
    // maximal element in set of all available elements
    T max_all = amax(first1, last1);
    
    T it = last2, tmp;
    it--;

   bool b = false; 

    
    T before; int cnt = 0;
    for(;;before = it--)
    {
        if(it == first2 && *before == *get_next_bigger(first1, last1, it)) 
            return false;

        if(cnt++)
            if(*get_next_bigger(first1, last1, it) == *before)
                continue;
        
        if(*it < *max_all)
            break;
    }
    
    tmp = it;
    do{
        *it = *get_next_bigger(first1, last1, tmp);
    } while((tmp = it++) != last2);
    
    return true;
}
Exemplo n.º 3
0
int main(void)
{
	/*
	CPUINFO info;
	
	GetCPUInfo(&info);

	printf("v_name:\t\t%s\n", info.vendorName);
	printf("model:\t\t%s\n", info.modelName);
    printf("family:\t\t%d\n", info.Family);
    printf("model:\t\t%d\n", info.Model);
    printf("stepping:\t%d\n", info.Stepping);
	printf("feature:\t%08x\n", info.Feature);
    expand(info.Feature, info.Checks);
	printf("os_support:\t%08x\n", info.OS_Support);
    expand(info.OS_Support, info.Checks);
    printf("checks:\t\t%08x\n", info.Checks);
	*/

	printf("test %d",amax(5,2,7) );
	printf("\n\nPress ENTER to exit. ");

	getchar();
	return 1;

}
Exemplo n.º 4
0
TYPE nrminf( //
		const ArrayListV<TYPE>& ax  //[in]
		) {
	int n = ax.size();
	const TYPE* x = ax.getPointer();
	return amax(n, x, 1);
}
// ****************************************************************************
//  Method:  avtAxisRestrictionToolInterface::ResetNumberOfAxes
//
//  Purpose:
//    Sets the number of available axes.
//
//  Arguments:
//    n          the number of axes
//
//  Programmer:  Jeremy Meredith
//  Creation:    February  1, 2008
//
//  Modifications:
//    Jeremy Meredith, Fri Feb 15 13:21:20 EST 2008
//    Added axis names to the axis restriction attributes.
//
// ****************************************************************************
void
avtAxisRestrictionToolInterface::ResetNumberOfAxes(int n)
{
    AxisRestrictionAttributes *a = (AxisRestrictionAttributes *)atts;
    stringVector aname(n, "");
    doubleVector amin(n, -1e+37);
    doubleVector amax(n, +1e+37);
    a->SetNames(aname);
    a->SetMinima(amin);
    a->SetMaxima(amax);
}
Exemplo n.º 6
0
/*************************************************************************
* This function checks if the pairwise balance of the between the two 
* partitions will improve by moving the vertex v from pfrom to pto,
* subject to the target partition weights of tfrom, and tto respectively
**************************************************************************/
int IsHBalanceBetterFT(int ncon, int nparts, float *pfrom, float *pto, float *vwgt, float *ubvec)
{
  int i/*, j, k*/;
  float blb1=0.0, alb1=0.0, sblb=0.0, salb=0.0;
  float blb2=0.0, alb2=0.0;
  float temp;

  for (i=0; i<ncon; i++) {
    temp = amax(pfrom[i], pto[i])*nparts/ubvec[i];
    if (blb1 < temp) {
      blb2 = blb1;
      blb1 = temp;
    }
    else if (blb2 < temp)
      blb2 = temp;
    sblb += temp;

    temp = amax(pfrom[i]-vwgt[i], pto[i]+vwgt[i])*nparts/ubvec[i];
    if (alb1 < temp) {
      alb2 = alb1;
      alb1 = temp;
    }
    else if (alb2 < temp)
      alb2 = temp;
    salb += temp;
  }

  if (alb1 < blb1)
    return 1;
  if (blb1 < alb1)
    return 0;
  if (alb2 < blb2)
    return 1;
  if (blb2 < alb2)
    return 0;
  
  return salb < sblb;

}
Exemplo n.º 7
0
/*************************************************************************
* This function checks if the pairwise balance of the between the two
* partitions will improve by moving the vertex v from pfrom to pto,
* subject to the target partition weights of tfrom, and tto respectively
**************************************************************************/
int IsHBalanceBetterFT(int ncon, floattype *pfrom, floattype *pto, floattype *nvwgt, floattype *ubvec)
{
  int i;
  floattype blb1=0.0, alb1=0.0, sblb=0.0, salb=0.0;
  floattype blb2=0.0, alb2=0.0;
  floattype temp;

  for (i=0; i<ncon; i++) {
    temp = amax(pfrom[i], pto[i])/ubvec[i];
    if (blb1 < temp) {
      blb2 = blb1;
      blb1 = temp;
    }
    else if (blb2 < temp)
      blb2 = temp;
    sblb += temp;

    temp = amax(pfrom[i]-nvwgt[i], pto[i]+nvwgt[i])/ubvec[i];
    if (alb1 < temp) {
      alb2 = alb1;
      alb1 = temp;
    }
    else if (alb2 < temp)
      alb2 = temp;
    salb += temp;
  }

  if (alb1 < blb1)
    return 1;
  if (blb1 < alb1)
    return 0;
  if (alb2 < blb2)
    return 1;
  if (blb2 < alb2)
    return 0;

  return salb < sblb;

}
Exemplo n.º 8
0
/*************************************************************************
* This function is the entry point for KWMETIS
**************************************************************************/
void METIS_mCPartGraphKway(int *nvtxs, int *ncon, idxtype *xadj, idxtype *adjncy, 
                          idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, 
                          int *nparts, floattype *rubvec, int *options, int *edgecut, 
                          idxtype *part)
{
  int i, j;
  GraphType graph;
  CtrlType ctrl;

  if (*numflag == 1)
    Change2CNumbering(*nvtxs, xadj, adjncy);

  SetUpGraph(&graph, OP_KMETIS, *nvtxs, *ncon, xadj, adjncy, vwgt, adjwgt, *wgtflag);

  if (options[0] == 0) {  /* Use the default parameters */
    ctrl.CType  = McKMETIS_CTYPE;
    ctrl.IType  = McKMETIS_ITYPE;
    ctrl.RType  = McKMETIS_RTYPE;
    ctrl.dbglvl = McKMETIS_DBGLVL;
  }
  else {
    ctrl.CType  = options[OPTION_CTYPE];
    ctrl.IType  = options[OPTION_ITYPE];
    ctrl.RType  = options[OPTION_RTYPE];
    ctrl.dbglvl = options[OPTION_DBGLVL];
  }
  ctrl.optype = OP_KMETIS;
  ctrl.CoarsenTo = amax((*nvtxs)/(20*log2Int(*nparts)), 30*(*nparts));

  ctrl.nmaxvwgt = 1.5/(1.0*ctrl.CoarsenTo);

  InitRandom(-1);

  AllocateWorkSpace(&ctrl, &graph, *nparts);

  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  ASSERT(CheckGraph(&graph));
  *edgecut = MCMlevelKWayPartitioning(&ctrl, &graph, *nparts, part, rubvec);

  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl));

  FreeWorkSpace(&ctrl, &graph);

  if (*numflag == 1)
    Change2FNumbering(*nvtxs, xadj, adjncy, part);
}
Exemplo n.º 9
0
/*************************************************************************
* This function is the entry point for KWMETIS
**************************************************************************/
void METIS_WPartGraphVKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, 
                          idxtype *vsize, int *wgtflag, int *numflag, int *nparts, 
                          float *tpwgts, int *options, int *volume, idxtype *part)
{
  int i, j;
  GraphType graph;
  CtrlType ctrl;

  if (*numflag == 1)
    Change2CNumbering(*nvtxs, xadj, adjncy);

  VolSetUpGraph(&graph, OP_KVMETIS, *nvtxs, 1, xadj, adjncy, vwgt, vsize, *wgtflag);

  if (options[0] == 0) {  /* Use the default parameters */
    ctrl.CType = KVMETIS_CTYPE;
    ctrl.IType = KVMETIS_ITYPE;
    ctrl.RType = KVMETIS_RTYPE;
    ctrl.dbglvl = KVMETIS_DBGLVL;
  }
  else {
    ctrl.CType = options[OPTION_CTYPE];
    ctrl.IType = options[OPTION_ITYPE];
    ctrl.RType = options[OPTION_RTYPE];
    ctrl.dbglvl = options[OPTION_DBGLVL];
  }
  ctrl.optype = OP_KVMETIS;
  ctrl.CoarsenTo = amax((*nvtxs)/(40*log2Int(*nparts)), 20*(*nparts));
  ctrl.maxvwgt = 1.5*((graph.vwgt ? idxsum(*nvtxs, graph.vwgt) : (*nvtxs))/ctrl.CoarsenTo);

  InitRandom(-1);

  AllocateWorkSpace(&ctrl, &graph, *nparts);

  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  *volume = MlevelVolKWayPartitioning(&ctrl, &graph, *nparts, part, tpwgts, 1.03);

  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimers(&ctrl));

  FreeWorkSpace(&ctrl, &graph);

  if (*numflag == 1)
    Change2FNumbering(*nvtxs, xadj, adjncy, part);
}
Exemplo n.º 10
0
/***********************************************************************************
* This function creates the fused-element-graph and returns the partition
************************************************************************************/
void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol,
              realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt,
              int *wgtflag, int *numflag, int *nparts, int *options,
              idxtype *part, MPI_Comm *comm)
{
  int npes, mype, nvtxs;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  nvtxs = vtxdist[mype+1]-vtxdist[mype];

  /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */

  SetUpCtrl(&ctrl, *nparts, options, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts));

  graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag);

  graph->where = part;

  PreAllocateMemory(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  CreateFusedElementGraph(&ctrl, graph, &wspace, numflag);

  idxcopy(nvtxs, graph->where, part);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  if (((*wgtflag)&2) == 0)
    IMfree((void**)&graph->vwgt, LTERM);
  IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy,
         &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr,
         &graph->imap, &graph->sendind, &graph, LTERM);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);
}
Exemplo n.º 11
0
/***********************************************************************************
* This function is the entry point of the parallel ordering algorithm.
* This function assumes that the graph is already nice partitioned among the 
* processors and then proceeds to perform recursive bisection.
************************************************************************************/
void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag,
              int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm)
{
  int i, j;
  int ltvwgts[MAXNCON];
  int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  idxtype *morder;
  int minnvtxs;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);
  nparts = npes;

  if (!ispow2(npes)) {
    if (mype == 0)
      printf("Error: The number of processors must be a power of 2!\n");
    return;
  }

  if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) {
    if (mype == 0)
      printf("Error: Too many processors for this many vertices.\n");
    return;
  }

  minnvtxs = vtxdist[1]-vtxdist[0];
  for (i=0; i<npes; i++)
    minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i];

  if (minnvtxs < (int)((float)npes*1.1)) {
    if (mype == 0)
      printf("Error: vertices are not distributed equally.\n");
    return;
  }
 

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1);

  SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes);

  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts));
  ctrl.seed = mype;
  ctrl.sync = seed;
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts");
  ctrl.ubvec[0] = 1.03;

  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /*=======================================================
   * Compute the initial k-way partitioning 
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Moc_Global_Partition(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

  /*=======================================================
   * Move the graph according to the partitioning
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  MALLOC_CHECK(NULL);
  graph->ncon = 1;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);
  MALLOC_CHECK(NULL);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

  /*=======================================================
   * Now compute an ordering of the moved graph
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  FreeWSpace(&wspace);
  PreAllocateMemory(&ctrl, mgraph, &wspace);

  ctrl.ipart = ISEP_NODE;
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000));

  /* compute tvwgts */
  for (j=0; j<mgraph->ncon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j];

  for (j=0; j<mgraph->ncon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]);


  morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder");
  MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace);

  MALLOC_CHECK(NULL);

  /* Invert the ordering back to the original graph */
  ProjectInfoBack(&ctrl, graph, order, morder, &wspace);

  MALLOC_CHECK(NULL);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  free(ctrl.tpwgts);
  free(morder);
  FreeGraph(mgraph);
  FreeInitialGraphAndRemap(graph, 0);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0);

  MALLOC_CHECK(NULL);
}
Exemplo n.º 12
0
/*************************************************************************
* This function induces a DT that satisfied the given size requirements
**************************************************************************/
idxtype InduceDecissionTree(idxtype nvtxs, DKeyValueType **xyzcand, idxtype *sflag, 
          idxtype nparts, idxtype *part, idxtype maxnvtxs, idxtype minnvtxs, float minfrac, idxtype *r_nnodes, 
          idxtype *r_nlnodes, DTreeNodeType *dtree, idxtype *dtpart, idxtype *dtipart, 
          idxtype *r_nclean, idxtype *r_naclean, idxtype *r_ndirty, idxtype *r_maxdepth, idxtype *marker)
{
  idxtype i, nr, nl, j, k, mynodeID, dim, pid, bestdim, nleft, nright, 
      lmaxdepth, rmaxdepth, isclean, isleaf, cleanmarked, nsvtxs;
  idxtype *tpwgts, *pwgts[2], lnvtxs[3];
  double points[3], scores[3], sum2[2], newscore, bestscore, bestpoint;
  DKeyValueType *lxyzcand[3], *rxyzcand[3];

  *r_maxdepth = 1;

  mynodeID = (*r_nnodes)++;
  dtree[mynodeID].nvtxs  = nvtxs;
  dtree[mynodeID].nsvtxs = 0;
  dtree[mynodeID].leafid = -1;


  /* Determine overall pwgts */
  tpwgts = idxsmalloc(nparts, 0, "InduceDecissionTree: tpwgts");

  for (i=0; i<nvtxs; i++) {
    dtree[mynodeID].nsvtxs += (sflag[xyzcand[0][i].val] ? 1 : 0);
    tpwgts[part[xyzcand[0][i].val]]++;
  }

  pid = idxargmax(nparts, tpwgts);

  /*-----------------------------------------------------------------------
   * Check the exit conditions
   *-----------------------------------------------------------------------*/
  isclean = (tpwgts[pid] == nvtxs);
  isleaf  = 0;

  if (nvtxs <= maxnvtxs && isclean) { /* Determine if single class */
    //mprintf("LEAF1: %5D             Pure node!\n", nvtxs);
    *r_nclean += nvtxs;
    isleaf = 1;
  }
  else if (nvtxs < minnvtxs) { /* Determine if too small to continue */
    for (k=0, i=0; i<nparts; i++)
      k += (tpwgts[i] > 0 ? 1 : 0);
    //mprintf("LEAF3: %5D %5D       Skipping small node!\n", nvtxs, k);
    
    *r_ndirty += nvtxs*k;
    isleaf = 1;
  } else if (nvtxs < maxnvtxs && tpwgts[pid] >= (int)(minfrac*nvtxs)) { /* Determine if mostly one class */
    //mprintf("LEAF2: %5D %5D %4D  Almost pure node!\n", nvtxs, tpwgts[idxargmax(nparts, tpwgts)], idxargmax(nparts, tpwgts));
    *r_naclean += nvtxs;
    isleaf = 1;
  } else  { /* Check if all coordinates are the same */
    for (dim=0; dim<3; dim++) {
      for (i=1; i<nvtxs; i++) {
        if (fabs(xyzcand[dim][0].key - xyzcand[dim][i].key) > DEPSILON)
          break;
      }
      if (i != nvtxs)
        break;
    }

    if (dim == 3) { /* All coordinates matched! Treat it as a dirty node! */
      for (k=0, i=0; i<nparts; i++)
        k += (tpwgts[i] > 0 ? 1 : 0);
      mprintf("LEAF4: %5D %5D       Skipping same coord-nodes! (%D %D)\n", nvtxs, k, isclean, part[xyzcand[0][0].val]);
    
      *r_ndirty += nvtxs*k;
      isleaf = 1;
    }
  }

  if (isleaf) {
    for (i=0; i<nvtxs; i++) {
      dtpart[xyzcand[0][i].val]  = *r_nlnodes;
      dtipart[xyzcand[0][i].val] = pid;
    }

    dtree[mynodeID].leafid = (*r_nlnodes)++;
    dtree[mynodeID].partid = pid;
    dtree[mynodeID].left = dtree[mynodeID].right = -1;

    gk_free((void **)&tpwgts, LTERM);
    return mynodeID;
  }



  /*-----------------------------------------------------------------------
   * Find the best splitting point
   *-----------------------------------------------------------------------*/
  pwgts[0] = idxmalloc(nparts, "InduceDecissionTree: pwgts[0]");
  pwgts[1] = idxmalloc(nparts, "InduceDecissionTree: pwgts[1]");

  /* Go and scan each dimension */
  for (dim=0; dim<3; dim++) {
    /* Establish initial conditions for the scan */
    idxcopy(nparts, tpwgts, pwgts[1]);
    idxset(nparts, 0, pwgts[0]);

    sum2[0] = sum2[1] = 0.0;
    for (j=0; j<nparts; j++) 
      sum2[1] += (double)pwgts[1][j]*pwgts[1][j];

    scores[dim] = -1.0;
    nleft = 0;

    /* Scan until you find a well-separated vertex */
    for (i=0; i<nvtxs-1; i++) {
      pid = part[xyzcand[dim][i].val];
      sum2[0] += 1.0 + (double)2*pwgts[0][pid];
      sum2[1] += 1.0 - (double)2*pwgts[1][pid];
      pwgts[0][pid]++;
      pwgts[1][pid]--;
      nleft++;

      if (fabs(xyzcand[dim][i].key < xyzcand[dim][i+1].key) > DEPSILON) {
        scores[dim] = sqrt(sum2[0])+sqrt(sum2[1]);
        points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0;
        lnvtxs[dim] = nleft;
        break;
      }
    }

#ifdef PRINTSTAT
    if (i == nvtxs-1)
      mprintf("DTree: Initial Scan Along dim %D failed!\n", dim);
#endif


    /* Continue with the rest */
    for (i++; i<nvtxs-1; i++) {
      pid = part[xyzcand[dim][i].val];
      sum2[0] += 1.0 + (double)2*pwgts[0][pid];
      sum2[1] += 1.0 - (double)2*pwgts[1][pid];
      pwgts[0][pid]++;
      pwgts[1][pid]--;
      nleft++;

      newscore = sqrt(sum2[0])+sqrt(sum2[1]);
      if (isclean) {
        if (i >= nvtxs/2) {
          if (fabs(xyzcand[dim][i].key - xyzcand[dim][i+1].key) < DEPSILON) 
            continue;

          scores[dim] = xyzcand[dim][nvtxs-1].key - xyzcand[dim][0].key; /* Use the axis span as the score */
          points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0;
          lnvtxs[dim] = nleft;
          break;
        }
      }
      else {
        if (newscore > scores[dim]) {
          if (fabs(xyzcand[dim][i].key - xyzcand[dim][i+1].key) < DEPSILON) 
            continue;

          scores[dim] = newscore;
          points[dim] = (xyzcand[dim][i].key+xyzcand[dim][i+1].key)/2.0;
          lnvtxs[dim] = nleft;
        }
      }

      //mprintf("%5D %f %f %f\n", nleft, newscore, sum2[0], sum2[1]);
    }

#ifdef PRINTSTAT
    /* Print some Stats */
    if (scores[dim] >= 0) {
      mprintf("Dim: %3D, Score: %f, Point: %f [%5D %5D] [%f %f]\n", dim, scores[dim], 
              points[dim], lnvtxs[dim], nvtxs-lnvtxs[dim], xyzcand[dim][0].key, xyzcand[dim][nvtxs-1].key);
      idxcopy(nparts, tpwgts, pwgts[1]);
      idxset(nparts, 0, pwgts[0]);
      for (i=0; i<lnvtxs[dim]; i++) {
        pid = part[xyzcand[dim][i].val];
        pwgts[0][pid]++;
        pwgts[1][pid]--;
      }
      for (j=0; j<nparts; j++) 
        if (pwgts[0][j]+pwgts[1][j] > 0)
          mprintf("%5D => %5D %5D\n", j, pwgts[0][j], pwgts[1][j]);
    }
#endif
  }

  /* Determine the best overall score */
  bestdim   = 0;
  bestscore = scores[0];
  bestpoint = points[0];
  for (dim=1; dim<3; dim++) {
    if (scores[dim] > bestscore) {
      bestscore = scores[dim];
      bestpoint = points[dim];
      bestdim   = dim;
    }
  }

  if (bestscore <= 0.0)
    errexit("Major Failure... Non-seperable! %4d nodes. Needs to be fixed!\n", nvtxs);

  
  dtree[mynodeID].dim   = bestdim;
  dtree[mynodeID].value = bestpoint;

  //mprintf("BestDim: %D!\n", bestdim);

  
  /*-----------------------------------------------------------------------
   * Ok, now go and do the split 
   *-----------------------------------------------------------------------*/
  nleft  = lnvtxs[bestdim];
  nright = nvtxs - nleft;

  for (dim=0; dim<3; dim++) {
    lxyzcand[dim] = (DKeyValueType *)gk_malloc(sizeof(DKeyValueType)*nleft,  "InduceDecissionTree: lxyzcand[dim]");
    rxyzcand[dim] = (DKeyValueType *)gk_malloc(sizeof(DKeyValueType)*nright, "InduceDecissionTree: rxyzcand[dim]");
  }

  /* Mark the left vertices */
  for (i=0; i<nleft; i++) 
    marker[xyzcand[bestdim][i].val] = 1;

  for (dim=0; dim<3; dim++) {
    for (nl=nr=0, i=0; i<nvtxs; i++) {
      if (marker[xyzcand[dim][i].val]) 
        lxyzcand[dim][nl++] = xyzcand[dim][i];
      else
        rxyzcand[dim][nr++] = xyzcand[dim][i];
    }
  }
    
  /* Reset the marking */
  for (i=0; i<nleft; i++) 
    marker[xyzcand[bestdim][i].val] = 0;

  gk_free((void **)&tpwgts, &pwgts[0], &pwgts[1], LTERM);


  dtree[mynodeID].left  = InduceDecissionTree(nleft, lxyzcand, sflag, nparts, part, maxnvtxs, 
                                minnvtxs, minfrac, r_nnodes, r_nlnodes, dtree, dtpart, dtipart, r_nclean, 
                                r_naclean, r_ndirty, &lmaxdepth, marker);
  dtree[mynodeID].right = InduceDecissionTree(nright, rxyzcand, sflag, nparts, part, maxnvtxs, 
                                minnvtxs, minfrac, r_nnodes, r_nlnodes, dtree, dtpart, dtipart, r_nclean, 
                                r_naclean, r_ndirty, &rmaxdepth, marker);

  *r_maxdepth += amax(lmaxdepth, rmaxdepth);
 
  gk_free((void **)&lxyzcand[0], &lxyzcand[1], &lxyzcand[2], &rxyzcand[0], &rxyzcand[1], &rxyzcand[2], LTERM);
  
  return mynodeID;
}
Exemplo n.º 13
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocGeneral2WayBalance(CtrlType *ctrl, GraphType *graph, float *tpwgts, float lbfactor)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  int qsizes[MAXNCON][2];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
    qsizes[i][0] = qsizes[i][1] = 0;
  }

  for (i=0; i<nvtxs; i++) {
    qnum[i] = samax(ncon, nvwgt+i*ncon);
    qsizes[qnum[i]][where[i]]++;
  }

/*
  printf("Weight Distribution:    \t");
  for (i=0; i<ncon; i++) 
    printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); 
  printf("\n");
*/

  for (from=0; from<2; from++) {
    for (j=0; j<ncon; j++) {
      if (qsizes[j][from] == 0) {
        for (i=0; i<nvtxs; i++) {
          if (where[i] != from)
            continue;

          k = samax2(ncon, nvwgt+i*ncon);
          if (k == j && qsizes[qnum[i]][from] > qsizes[j][from] && nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) {
            qsizes[qnum[i]][from]--;
            qsizes[j][from]++;
            qnum[i] = j;
          }
        }
      }
    }
  }

/*
  printf("Weight Distribution (after):\t ");
  for (i=0; i<ncon; i++) 
    printf(" [%d %d]", qsizes[i][0], qsizes[i][1]); 
  printf("\n");
*/



  for (i=0; i<ncon; i++) 
    mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
  minbal = origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
  newcut = mincut = graph->mincut;
  mincutorder = -1;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
  }

  idxset(nvtxs, -1, moved);

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  /* Insert all nodes in the priority queues */
  nbnd = graph->nbnd;
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
  }

  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (minbal < lbfactor)
      break;

    SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
    to = (from+1)%2;

    if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    if (newbal < minbal || (newbal == minbal && 
        (newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
      mincut = newcut;
      minbal = newbal;
      mincutorder = nswaps;
      for (i=0; i<ncon; i++)
        mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
      for (l=0; l<ncon; l++) 
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf(", %.3f LB: %.3f\n", minbal, newbal);
    }


    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1) 
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)  
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }



  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
  }

  graph->mincut = mincut;
  graph->nbnd = nbnd;


  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}
Exemplo n.º 14
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor)
{
  int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  FPQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  int qsizes[MAXNCON][2];
  KeyValueType *cand;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->sendind;
  ed = graph->recvind;
  npwgts = graph->gnpwgts;
  bndptr = graph->sendptr;
  bndind = graph->recvptr;

  moved = idxmalloc(nvtxs, "moved");
  swaps = idxmalloc(nvtxs, "swaps");
  qnum = idxmalloc(nvtxs, "qnum");
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");


  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    FPQueueInit(&parts[i][0], nvtxs);
    FPQueueInit(&parts[i][1], nvtxs);
    qsizes[i][0] = qsizes[i][1] = 0;
  }

  for (i=0; i<nvtxs; i++) {
    qnum[i] = samax(ncon, nvwgt+i*ncon);
    qsizes[qnum[i]][where[i]]++;
  }

  for (from=0; from<2; from++) {
    for (j=0; j<ncon; j++) {
      if (qsizes[j][from] == 0) {
        for (i=0; i<nvtxs; i++) {
          if (where[i] != from)
            continue;

          k = samax2(ncon, nvwgt+i*ncon);
          if (k == j &&
               qsizes[qnum[i]][from] > qsizes[j][from] &&
               nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) {
            qsizes[qnum[i]][from]--;
            qsizes[j][from]++;
            qnum[i] = j;
          }
        }
      }
    }
  }


  for (i=0; i<ncon; i++)
    mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
  minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
  newcut = mincut = graph->mincut;
  mincutorder = -1;

  idxset(nvtxs, -1, moved);

  /* Insert all nodes in the priority queues */
  nbnd = graph->gnvtxs;
  for (i=0; i<nvtxs; i++) {
    cand[i].key = id[i]-ed[i];
    cand[i].val = i;
  }
  ikeysort(nvtxs, cand);

  for (ii=0; ii<nvtxs; ii++) {
    i = cand[ii].val;
    FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i]));
  }

  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (minbal < lbfactor)
      break;

    Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
    to = (from+1)%2;

    if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    if (newbal < minbal || (newbal == minbal &&
        (newcut < mincut || (newcut == mincut &&
          Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
      mincut = newcut;
      minbal = newbal;
      mincutorder = nswaps;
      for (i=0; i<ncon; i++)
        mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(oldgain), (float)(ed[k]-id[k]));

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1)
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }


  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  graph->mincut = mincut;
  graph->gnvtxs = nbnd;


  for (i=0; i<ncon; i++) {
    FPQueueFree(&parts[i][0]);
    FPQueueFree(&parts[i][1]);
  }

  GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
  return;
}
Exemplo n.º 15
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses)
{
  int i, ii, j, k;
  int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  FPQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, initcut, newcut, mincutorder;
  float rtpwgts[MAXNCON*2];
  KeyValueType *cand;
int mype;
MPI_Comm_rank(MPI_COMM_WORLD, &mype);

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->sendind;
  ed = graph->recvind;
  npwgts = graph->gnpwgts;
  bndptr = graph->sendptr;
  bndind = graph->recvptr;

  moved = idxmalloc(nvtxs, "moved");
  swaps = idxmalloc(nvtxs, "swaps");
  qnum = idxmalloc(nvtxs, "qnum");
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");

  limit = amin(amax(0.01*nvtxs, 25), 150);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    FPQueueInit(&parts[i][0], nvtxs);
    FPQueueInit(&parts[i][1], nvtxs);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

  for (i=0; i<ncon; i++) {
    rtpwgts[i] = origbal*tpwgts[i];
    rtpwgts[ncon+i] = origbal*tpwgts[ncon+i];
  }

  idxset(nvtxs, -1, moved);
  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */
    for (i=0; i<ncon; i++) {
      FPQueueReset(&parts[i][0]);
      FPQueueReset(&parts[i][1]);
    }

    mincutorder = -1;
    newcut = mincut = initcut = graph->mincut;
    for (i=0; i<ncon; i++)
      mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
    minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    /* Insert boundary nodes in the priority queues */
    nbnd = graph->gnvtxs;

    for (i=0; i<nbnd; i++) {
      cand[i].key = id[i]-ed[i];
      cand[i].val = i;
    }
    ikeysort(nbnd, cand);

    for (ii=0; ii<nbnd; ii++) {
      i = bndind[cand[ii].val];
      FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i]));
    }

    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
      to = (from+1)%2;

      if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
        break;

      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

      newcut -= (ed[higain]-id[higain]);
      newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

      if ((newcut < mincut && newbal-origbal <= .00001) ||
          (newcut == mincut && (newbal < minbal ||
                                (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
        mincut = newcut;
        minbal = newbal;
        mincutorder = nswaps;
        for (i=0; i<ncon; i++)
          mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
      }
      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
        newcut += (ed[higain]-id[higain]);
        saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
        saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
        break;
      }

      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;

      /**************************************************************
      * Update the id[i]/ed[i] values of the affected nodes
      ***************************************************************/
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);

      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        oldgain = ed[k]-id[k];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        /* Update its boundary information and queue position */
        if (bndptr[k] != -1) { /* If k was a boundary vertex */
          if (ed[k] == 0) { /* Not a boundary vertex any more */
            BNDDelete(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)  /* Remove it if in the queues */
              FPQueueDelete(&parts[qnum[k]][where[k]], k);
          }
          else { /* If it has not been moved, update its position in the queue */
            if (moved[k] == -1)
              FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k]));
          }
        }
        else {
          if (ed[k] > 0) {  /* It will now become a boundary vertex */
            BNDInsert(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)
              FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k]));
          }
        }
      }
    }

    /****************************************************************
    * Roll back computations
    *****************************************************************/
    for (i=0; i<nswaps; i++)
      moved[swaps[i]] = -1;  /* reset moved array */
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      to = where[higain] = (where[higain]+1)%2;
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);
      else if (ed[higain] > 0 && bndptr[higain] == -1)
        BNDInsert(nbnd, bndind,  bndptr, higain);

      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        if (bndptr[k] != -1 && ed[k] == 0)
          BNDDelete(nbnd, bndind, bndptr, k);
        if (bndptr[k] == -1 && ed[k] > 0)
          BNDInsert(nbnd, bndind, bndptr, k);
      }
    }

    graph->mincut = mincut;
    graph->gnvtxs = nbnd;

    if (mincutorder == -1 || mincut == initcut)
      break;
  }

  for (i=0; i<ncon; i++) {
    FPQueueFree(&parts[i][0]);
    FPQueueFree(&parts[i][1]);
  }

  GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
  return;
}
Exemplo n.º 16
0
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void Mc_SerialKWayAdaptRefine(GraphType *graph, int nparts, idxtype *home,
     float *orgubvec, int npasses)
{
  int i, ii, iii, j, k;
  int nvtxs, ncon, pass, nmoves, myndegrees;
  int from, me, myhome, to, oldcut, gain, tmp;
  idxtype *xadj, *adjncy, *adjwgt;
  idxtype *where;
  EdgeType *mydegrees;
  RInfoType *rinfo, *myrinfo;
  float *npwgts, *nvwgt, *minwgt, *maxwgt, ubvec[MAXNCON];
  int gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home;
  int zero_gain, better_balance_ft, better_balance_tt;
  KeyValueType *cand;
int mype;
MPI_Comm_rank(MPI_COMM_WORLD, &mype);

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  rinfo = graph->rinfo;
  npwgts = graph->gnpwgts;
  
  /* Setup the weight intervals of the various subdomains */
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
  minwgt =  fmalloc(nparts*ncon, "minwgt");
  maxwgt = fmalloc(nparts*ncon, "maxwgt");

  ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec);
  for (i=0; i<ncon; i++)
    ubvec[i] = amax(ubvec[i], orgubvec[i]);

  for (i=0; i<nparts; i++) {
    for (j=0; j<ncon; j++) {
      maxwgt[i*ncon+j] = ubvec[j]/(float)nparts;
      minwgt[i*ncon+j] = ubvec[j]*(float)nparts;
    }
  }

  for (pass=0; pass<npasses; pass++) {
    oldcut = graph->mincut;

    for (i=0; i<nvtxs; i++) {
      cand[i].key = rinfo[i].id-rinfo[i].ed;
      cand[i].val = i;
    }
    ikeysort(nvtxs, cand);

    nmoves = 0;
    for (iii=0; iii<nvtxs; iii++) {
      i = cand[iii].val;

      myrinfo = rinfo+i;

      if (myrinfo->ed >= myrinfo->id) {
        from = where[i];
        myhome = home[i];
        nvwgt = graph->nvwgt+i*ncon;

        if (myrinfo->id > 0 &&
        AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) 
          continue;

        mydegrees = myrinfo->degrees;
        myndegrees = myrinfo->ndegrees;

        for (k=0; k<myndegrees; k++) {
          to = mydegrees[k].edge;
          gain = mydegrees[k].ewgt - myrinfo->id; 
          if (gain >= 0 && 
             (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
             IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) {
            break;
          }
        }

        /* break out if you did not find a candidate */
        if (k == myndegrees)
          continue;

        for (j=k+1; j<myndegrees; j++) {
          to = mydegrees[j].edge;
          going_home = (myhome == to);
          gain_is_same = (mydegrees[j].ewgt == mydegrees[k].ewgt);
          gain_is_greater = (mydegrees[j].ewgt > mydegrees[k].ewgt);
          fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon);
          better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
                              npwgts+to*ncon,nvwgt,ubvec);
          better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon,
                              npwgts+to*ncon,nvwgt,ubvec);

          if (
               (gain_is_greater &&
                 (fit_in_to ||
                  better_balance_ft)
               )
            ||
               (gain_is_same &&
                 (
                   (fit_in_to &&
                    going_home)
                ||
                    better_balance_tt
                 )
               )
             ) {
            k = j;
          }
        }

        to = mydegrees[k].edge;
        going_home = (myhome == to);
        zero_gain = (mydegrees[k].ewgt == myrinfo->id);

        fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon,
                      maxwgt+from*ncon);
        better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
                            npwgts+to*ncon,nvwgt,ubvec);

        if (zero_gain &&
            !going_home &&
            !better_balance_ft &&
            fit_in_from)
          continue;

        /*=====================================================================
        * If we got here, we can now move the vertex from 'from' to 'to' 
        *======================================================================*/
        graph->mincut -= mydegrees[k].ewgt-myrinfo->id;

        /* Update where, weight, and ID/ED information of the vertex you moved */
        saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
        saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
        where[i] = to;
        myrinfo->ed += myrinfo->id-mydegrees[k].ewgt;
        SWAP(myrinfo->id, mydegrees[k].ewgt, tmp);

        if (mydegrees[k].ewgt == 0) {
          myrinfo->ndegrees--;
          mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
          mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
        }
        else
          mydegrees[k].edge = from;

        /* Update the degrees of adjacent vertices */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          ii = adjncy[j];
          me = where[ii];

          myrinfo = rinfo+ii;
          mydegrees = myrinfo->degrees;

          if (me == from) {
            INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
          }
          else {
            if (me == to) {
              INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
            }
          }

          /* Remove contribution of the ed from 'from' */
          if (me != from) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (mydegrees[k].edge == from) {
                if (mydegrees[k].ewgt == adjwgt[j]) {
                  myrinfo->ndegrees--;
                  mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
                  mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
                }
                else
                  mydegrees[k].ewgt -= adjwgt[j];
                break;
              }
            }
          }

          /* Add contribution of the ed to 'to' */
          if (me != to) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (mydegrees[k].edge == to) {
                mydegrees[k].ewgt += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              mydegrees[myrinfo->ndegrees].edge = to;
              mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
            }
          }

        }
        nmoves++;
      }
    }

    if (graph->mincut == oldcut)
      break;
  }

  GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM);

  return;
}
Exemplo n.º 17
0
/*************************************************************************
* This function computes the assignment using the the objective the 
* minimization of the total volume of data that needs to move
**************************************************************************/
void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map,
     WorkSpaceType *wspace, int npasses, int ncon)
{
  int i, ii, j, k, nparts, mype;
  int pass, maxipwgt, nmapped, oldwgt, newwgt, done;
  idxtype *rowmap, *mylpwgts;
  KeyValueType *recv, send;
  int nsaved, gnsaved;

  mype   = ctrl->mype;
  nparts = ctrl->nparts;

  recv     = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv");
  mylpwgts = idxmalloc(nparts, "mylpwgts");

  done = nmapped = 0;
  idxset(nparts, -1, map);
  rowmap = idxset(nparts, -1, wspace->pv3);
  idxcopy(nparts, lpwgts, mylpwgts);
  for (pass=0; pass<npasses; pass++) {
    maxipwgt = idxamax(nparts, mylpwgts);

    if (mylpwgts[maxipwgt] > 0 && !done) {
      send.key = -mylpwgts[maxipwgt];
      send.val = mype*nparts+maxipwgt;
    }
    else {
      send.key = 0;
      send.val = -1;
    }

    /* each processor sends its selection */
    MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); 

    ikeysort(nparts, recv);
    if (recv[0].key == 0)
      break;

    /* now make as many assignments as possible */
    for (ii=0; ii<nparts; ii++) {
      i = recv[ii].val;

      if (i == -1)
        continue;

      j = i % nparts;
      k = i / nparts;
      if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) {
        map[j] = k;
        rowmap[k] = j;
        nmapped++;
        mylpwgts[j] = 0;
        if (mype == k)
          done = 1;
      }

      if (nmapped == nparts)
        break;
    }

    if (nmapped == nparts)
      break;
  }

  /* Map unmapped partitions */
  if (nmapped < nparts) {
    for (i=j=0; j<nparts && nmapped<nparts; j++) {
      if (map[j] == -1) {
        for (; i<nparts; i++) {
          if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) {
            map[j] = i;
            rowmap[i] = j;
            nmapped++;
            break;
          }
        }
      }
    }
  }

  /* check to see if remapping fails (due to dis-similar tpwgts) */
  /* if remapping fails, revert to original mapping */
  if (nmapped < nparts) {
    for (i=0; i<nparts; i++)
      map[i] = i; 
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); 
  }
  else {
    /* check for a savings */
    oldwgt  = lpwgts[mype];
    newwgt  = lpwgts[rowmap[mype]];
    nsaved  = newwgt - oldwgt;
    gnsaved = GlobalSESum(ctrl, nsaved);

    /* undo everything if we don't see a savings */
    if (gnsaved <= 0) {
      for (i=0; i<nparts; i++)
        map[i] = i;
    }
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); 
  }

  GKfree((void **)&recv, (void **)&mylpwgts, LTERM);

}
Exemplo n.º 18
0
/*************************************************************************
* This function is the entry point of the initial partition algorithm
* that does recursive bissection.
* This algorithm assembles the graph to all the processors and preceeds
* by parallelizing the recursive bisection step.
**************************************************************************/
void Mc_InitPartition_RB(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    int i, j;
    int ncon, mype, npes, gnvtxs, ngroups;
    idxtype *xadj, *adjncy, *adjwgt, *vwgt;
    idxtype *part, *gwhere0, *gwhere1;
    idxtype *tmpwhere, *tmpvwgt, *tmpxadj, *tmpadjncy, *tmpadjwgt;
    GraphType *agraph;
    int lnparts, fpart, fpe, lnpes;
    int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut;
    float *mytpwgts, mytpwgts2[2], lbvec[MAXNCON], lbsum, min_lbsum, wsum;
    MPI_Comm ipcomm;
    struct {
        float sum;
        int rank;
    } lpesum, gpesum;

    ncon = graph->ncon;
    ngroups = amax(amin(RIP_SPLIT_FACTOR, ctrl->npes), 1);

    IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

    agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace);
    part = idxmalloc(agraph->nvtxs, "Mc_IP_RB: part");
    xadj = idxmalloc(agraph->nvtxs+1, "Mc_IP_RB: xadj");
    adjncy = idxmalloc(agraph->nedges, "Mc_IP_RB: adjncy");
    adjwgt = idxmalloc(agraph->nedges, "Mc_IP_RB: adjwgt");
    vwgt = idxmalloc(agraph->nvtxs*ncon, "Mc_IP_RB: vwgt");

    idxcopy(agraph->nvtxs*ncon, agraph->vwgt, vwgt);
    idxcopy(agraph->nvtxs+1, agraph->xadj, xadj);
    idxcopy(agraph->nedges, agraph->adjncy, adjncy);
    idxcopy(agraph->nedges, agraph->adjwgt, adjwgt);

    MPI_Comm_split(ctrl->gcomm, ctrl->mype % ngroups, 0, &ipcomm);
    MPI_Comm_rank(ipcomm, &mype);
    MPI_Comm_size(ipcomm, &npes);

    gnvtxs = agraph->nvtxs;

    gwhere0 = idxsmalloc(gnvtxs, 0, "Mc_IP_RB: gwhere0");
    gwhere1 = idxmalloc(gnvtxs, "Mc_IP_RB: gwhere1");

    /* ADD: this assumes that tpwgts for all constraints is the same */
    /* ADD: this is necessary because serial metis does not support the general case */
    mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts");
    for (i=0; i<ctrl->nparts; i++)
        for (j=0; j<ncon; j++)
            mytpwgts[i] += ctrl->tpwgts[i*ncon+j];
    for (i=0; i<ctrl->nparts; i++)
        mytpwgts[i] /= (float)ncon;

    /* Go into the recursive bisection */
    /* ADD: consider changing this to breadth-first type bisection */
    moptions[0] = 0;
    moptions[7] = ctrl->sync + (ctrl->mype % ngroups) + 1;

    lnparts = ctrl->nparts;
    fpart = fpe = 0;
    lnpes = npes;
    while (lnpes > 1 && lnparts > 1) {
        /* Determine the weights of the partitions */
        mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart);
        mytpwgts2[1] = 1.0-mytpwgts2[0];

        if (ncon == 1)
            METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy,
                                  agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2,
                                  moptions, &edgecut, part);
        else {
            METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj,
                                        agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag,
                                        &twoparts, mytpwgts2, moptions, &edgecut, part);
        }

        wsum = ssum(lnparts/2, mytpwgts+fpart);
        sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart);
        sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2);

        /* I'm picking the left branch */
        if (mype < fpe+lnpes/2) {
            Mc_KeepPart(agraph, wspace, part, 0);
            lnpes = lnpes/2;
            lnparts = lnparts/2;
        }
        else {
            Mc_KeepPart(agraph, wspace, part, 1);
            fpart = fpart + lnparts/2;
            fpe = fpe + lnpes/2;
            lnpes = lnpes - lnpes/2;
            lnparts = lnparts - lnparts/2;
        }
    }

    /* In case npes is greater than or equal to nparts */
    if (lnparts == 1) {
        /* Only the first process will assign labels (for the reduction to work) */
        if (mype == fpe) {
            for (i=0; i<agraph->nvtxs; i++)
                gwhere0[agraph->label[i]] = fpart;
        }
    }
    /* In case npes is smaller than nparts */
    else {
        if (ncon == 1)
            METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy,
                                  agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart,
                                  moptions, &edgecut, part);
        else
            METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj,
                                        agraph->adjncy, agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag,
                                        &lnparts, mytpwgts+fpart, moptions, &edgecut, part);

        for (i=0; i<agraph->nvtxs; i++)
            gwhere0[agraph->label[i]] = fpart + part[i];
    }

    MPI_Allreduce((void *)gwhere0, (void *)gwhere1, gnvtxs, IDX_DATATYPE, MPI_SUM, ipcomm);

    if (ngroups > 1) {
        tmpxadj = agraph->xadj;
        tmpadjncy = agraph->adjncy;
        tmpadjwgt = agraph->adjwgt;
        tmpvwgt = agraph->vwgt;
        tmpwhere = agraph->where;
        agraph->xadj = xadj;
        agraph->adjncy = adjncy;
        agraph->adjwgt = adjwgt;
        agraph->vwgt = vwgt;
        agraph->where = gwhere1;
        agraph->vwgt = vwgt;
        agraph->nvtxs = gnvtxs;
        Mc_ComputeSerialBalance(ctrl, agraph, gwhere1, lbvec);
        lbsum = ssum(ncon, lbvec);

        edgecut = ComputeSerialEdgeCut(agraph);
        MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ctrl->gcomm);
        MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ctrl->gcomm);

        lpesum.sum = lbsum;
        if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
            if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                lpesum.sum = (float) (edgecut);
            else
                lpesum.sum = (float) (max_cut);
        }

        MPI_Comm_rank(ctrl->gcomm, &(lpesum.rank));
        MPI_Allreduce((void *)&lpesum, (void *)&gpesum, 1, MPI_FLOAT_INT, MPI_MINLOC, ctrl->gcomm);
        MPI_Bcast((void *)gwhere1, gnvtxs, IDX_DATATYPE, gpesum.rank, ctrl->gcomm);

        agraph->xadj = tmpxadj;
        agraph->adjncy = tmpadjncy;
        agraph->adjwgt = tmpadjwgt;
        agraph->vwgt = tmpvwgt;
        agraph->where = tmpwhere;
    }

    idxcopy(graph->nvtxs, gwhere1+graph->vtxdist[ctrl->mype], graph->where);

    FreeGraph(agraph);
    MPI_Comm_free(&ipcomm);
    GKfree((void **)&gwhere0, (void **)&gwhere1, (void **)&mytpwgts, (void **)&part, (void **)&xadj, (void **)&adjncy, (void **)&adjwgt, (void **)&vwgt, LTERM);

    IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

}
Exemplo n.º 19
0
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, 
	      int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, 
	      idxtype *part, MPI_Comm *comm)
{
  int h, i;
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;
  int tewgt, tvsize, nmoved, maxin, maxout;
  float gtewgt, gtvsize, avg, maximb;
  int ps_relation, seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /********************************/
  /* Try and take care bad inputs */
  /********************************/
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];
  CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
              NULL, NULL, options, ioptions, part, comm);

  /* ADD: take care of disconnected graph */
  /* ADD: take care of highly unbalanced vtxdist */
  /*********************************/
  /* Take care the nparts = 1 case */
  /*********************************/
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); 
    *edgecut = 0;
    return;
  }

  /**************************/
  /* Set up data structures */
  /**************************/
  if (inumflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /*****************************/
  /* Set up control structures */
  /*****************************/
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
    ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED;
  }
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
    ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED;
  }

  SetUpCtrl(&ctrl, inparts, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts));
  ctrl.ipc_factor = 1000.0;
  ctrl.redist_factor = 1.0;
  ctrl.redist_base = 1.0;
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = REFINE_PARTITION;
  ctrl.ps_relation = ps_relation;
  ctrl.tpwgts = itpwgts;

  graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize");

  graph->home = idxmalloc(graph->nvtxs, "home");
  if (ctrl.ps_relation == COUPLED)
    idxset(graph->nvtxs, mype, graph->home);
  else
    idxcopy(graph->nvtxs, part, graph->home);

  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs;
  gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl.edge_size_ratio = gtewgt/gtvsize;
  scopy(incon, iubvec, ctrl.ubvec);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /***********************/
  /* Partition and Remap */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Adaptive_Partition(&ctrl, graph, &wspace);
  ParallelReMapGraph(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  idxcopy(graph->nvtxs, graph->where, part);
  if (edgecut != NULL)
    *edgecut = graph->mincut;

  /***********************/
  /* Take care of output */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout);
    rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    }
    rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout);
  }

  /*************************************/
  /* Free memory, renumber, and return */
  /*************************************/
  GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM);

  GKfree((void **)&itpwgts, LTERM);
  FreeInitialGraphAndRemap(graph, iwgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

  return;
}
Exemplo n.º 20
0
DWORD WINAPI netGameLoop(void *param)
{
	int len, c, x, y;
	Psquare p;
	TnetGameSettings *b;
	char *m, *u, *a;
	hostent *h;
	char buf[256];

	if(!param){
		buf[0]=(BYTE)C_INIT1;
		buf[1]=NETGAME_VERSION;
		wr(buf, 2);
		c=rd1();
		if(c!=C_INIT2) goto le;
		c=rd1();
		if(c<1) goto le;
		netGameVersion=c;
		SetForegroundWindow(hWin);
		h= gethostbyaddr((char*)&netGameIP.sin_addr, 4, netGameIP.sin_family);
		a= inet_ntoa(netGameIP.sin_addr);
		if(msg1(MB_YESNO|MB_ICONQUESTION,
			lng(868, "Do you want to play with %s [%s] ?"),
			(h && h->h_name) ? h->h_name : "???", a ? a : "???")!=IDYES){
			wr1(C_INIT_DENY);
			goto le;
		}
		buf[0]=(BYTE)C_INFO;
		buf[1]=sizeof(TnetGameSettings);
		b= (TnetGameSettings*)(buf+2);
		b->width=(char)width;
		b->height=(char)height;
		b->begin= (player==1);
		b->rule5=(char)ruleFive;
		b->cont=(char)continuous;
		if(netGameVersion<2) b->rule5=b->cont=0;
		wr(buf, 2+sizeof(TnetGameSettings));
		if(rd1()!=C_INFO_OK) goto le;
		b->begin= !b->begin;
	}
	else{
		buf[0]=(BYTE)C_INIT2;
		buf[1]=NETGAME_VERSION;
		wr(buf, 2);
		c=rd1();
		if(c==C_BUSY) wrLog(lng(874, "The other player is already playing with someone else"));
		if(c!=C_INIT1) goto le;
		c=rd1();
		if(c<1) goto le;
		netGameVersion=c;
		wrLog(lng(871, "Connection established. Waiting for response..."));
		c=rd1();
		if(c==C_INIT_DENY) wrLog(lng(870, "The other player doesn't want to play with you !"));
		if(c!=C_INFO) goto le;
		len=rd1();
		b= (TnetGameSettings*)buf;
		memset(buf, 0, sizeof(TnetGameSettings));
		if(len<2 || rd(buf, len)<0) goto le;
		wr1(C_INFO_OK);
	}
	SendMessage(hWin, WM_COMMAND, 992, (LPARAM)b);
	undoRequest=0;

	for(;;){
		x=rd1();
		switch(x){
			case C_MSG: //message
				len=rd2();
				if(len<=0) goto le;
				u=new char[2*len];
				if(rd(u, 2*len)>=0){
					m=new char[len+1];
					WideCharToMultiByte(CP_ACP, 0, (WCHAR*)u, len, m, len, 0, 0);
					m[len]='\0';
					wrLog("--->  %s", m);
					delete[] m;
					SetWindowPos(logDlg, HWND_TOP, 0, 0, 0, 0, SWP_NOMOVE|SWP_NOSIZE|SWP_ASYNCWINDOWPOS|SWP_NOACTIVATE|SWP_SHOWWINDOW);
				}
				delete[] u;
				break;
			case C_UNDO_REQUEST:
				y=rd2();
				if(y<=0 || y>moves) goto le;
				if(undoRequest){
					//both players pressed undo simultaneously
					if(undoRequest<0) undoRequest=y;
					amax(undoRequest, y);
					postUndo();
				}
				else{
					c=msg1(MB_YESNO|MB_ICONQUESTION,
						lng(876, "The other player wants to UNDO the last move.\r\nDo you agree ?"));
					if(c==IDYES){
						undoRequest=y;
						wr1(C_UNDO_YES);
						postUndo();
					}
					else{
						wr1(C_UNDO_NO);
					}
				}
				break;
			case C_NEW_REQUEST:
				if(undoRequest){
					if(undoRequest<0){
						//both players pressed NewGame simultaneously
						postNewGame();
					}
					else{
						postUndo();
					}
				}
				else{
					c=msg1(MB_YESNO|MB_ICONQUESTION,
						lng(877, "The other player wants to start a NEW game.\r\nDo you agree ?"));
					if(c==IDYES){
						undoRequest=-1;
						wr1(C_NEW_YES);
						postNewGame();
					}
					else{
						wr1(C_NEW_NO);
					}
				}
				break;
			case C_UNDO_YES:
				if(undoRequest<=0) goto le;
				postUndo();
				break;
			case C_UNDO_NO:
				if(undoRequest>0){
					msglng(878, "Sorry, the other player does not allow to UNDO your move.");
					undoRequest=0;
				}
				break;
			case C_NEW_YES:
				if(undoRequest>=0) goto le;
				postNewGame();
				break;
			case C_NEW_NO:
				if(undoRequest<0){
					msglng(879, "Sorry, the other player does not allow to start a NEW game.");
					undoRequest=0;
				}
				break;
			default: //move or error
				if(x<0 || x>=width){
					show(logDlg);
					goto le;
				}
				while(finished && (getTickCount()-lastTick<5000 || saveLock)){
					Sleep(200);
				}
				if(finished) SendMessage(hWin, WM_COMMAND, 101, 0);
				y=rd1();
				if(y<0 || y>=height) goto le;
				p=Square(x, y);
				p->time=rd4();
				PostMessage(hWin, WM_COMMAND, 991, (LPARAM)p);
		}
	}
le:
	EnterCriticalSection(&netLock);
	netGameDone();
	LeaveCriticalSection(&netLock);
	return 0;
}
Exemplo n.º 21
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocGeneral2WayBalance2(CtrlType *ctrl, GraphType *graph, float *tpwgts, float *ubvec)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, origbal[MAXNCON], minbal[MAXNCON], newbal[MAXNCON];
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  float *maxwgt, *minwgt, tvec[MAXNCON];


  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Setup the weight intervals of the two subdomains */
  minwgt = fwspacemalloc(ctrl, 2*ncon);
  maxwgt = fwspacemalloc(ctrl, 2*ncon);

  for (i=0; i<2; i++) {
    for (j=0; j<ncon; j++) {
      maxwgt[i*ncon+j] = tpwgts[i]*ubvec[j];
      minwgt[i*ncon+j] = tpwgts[i]*(1.0/ubvec[j]);
    }
  }


  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, origbal);
  for (i=0; i<ncon; i++) 
    minbal[i] = origbal[i];

  newcut = mincut = graph->mincut;
  mincutorder = -1;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: ", tpwgts[0], tpwgts[1], 
            graph->nvtxs, graph->nbnd, graph->mincut);
    for (i=0; i<ncon; i++)
      printf("%.3f ", origbal[i]);
    printf("[B]\n");
  }

  idxset(nvtxs, -1, moved);

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  /* Insert all nodes in the priority queues */
  nbnd = graph->nbnd;
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
  }


  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (AreAllBelow(ncon, minbal, ubvec))
      break;

    SelectQueue3(ncon, npwgts, tpwgts, &from, &cnum, parts, maxwgt);
    to = (from+1)%2;

    if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, newbal);

    if (IsBetter2wayBalance(ncon, newbal, minbal, ubvec) || 
        (IsBetter2wayBalance(ncon, newbal, origbal, ubvec) && newcut < mincut)) {
      mincut = newcut;
      for (i=0; i<ncon; i++) 
        minbal[i] = newbal[i];
      mincutorder = nswaps;
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
      for (i=0; i<ncon; i++) 
        printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);

      Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
      printf(", LB: ");
      for (i=0; i<ncon; i++) 
        printf("%.3f ", tvec[i]);
      if (mincutorder == nswaps)
        printf(" *\n");
      else
        printf("\n");
    }


    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1]) 
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1) 
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)  
        BNDInsert(nbnd, bndind, bndptr, k);
    }
   
  }



  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (i=0; i<nswaps; i++)
    moved[swaps[i]] = -1;  /* reset moved array */
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
    for (i=0; i<ncon; i++)
      printf("(%.3f, %.3f) ", npwgts[i], npwgts[ncon+i]);
    printf("], LB: ");
    Compute2WayHLoadImbalanceVec(ncon, npwgts, tpwgts, tvec);
    for (i=0; i<ncon; i++) 
      printf("%.3f ", tvec[i]);
    printf("\n");
  }

  graph->mincut = mincut;
  graph->nbnd = nbnd;


  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  fwspacefree(ctrl, 2*ncon);
  fwspacefree(ctrl, 2*ncon);

}
Exemplo n.º 22
0
void FM_2WayNodeRefine_OneSidedP(CtrlType *ctrl, GraphType *graph, 
          idxtype *hmarker, float ubfactor, int npasses)
{
  int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind, nbad, qsize;
  idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
  idxtype *mptr, *mind, *swaps, *perm, *inqueue;
  PQueueType parts; 
  NRInfoType *rinfo;
  int higain, oldgain, mincut, initcut, mincutorder;	
  int pass, from, to, limit;
  int badmaxpwgt, mindiff, newdiff;

  ASSERT(graph->mincut == graph->pwgts[2]);

  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  vwgt   = graph->vwgt;

  bndind = graph->bndind;
  bndptr = graph->bndptr;
  where  = graph->where;
  pwgts  = graph->pwgts;
  rinfo  = graph->nrinfo;

  PQueueInit(ctrl, &parts, nvtxs, ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt));
      
  perm    = idxwspacemalloc(ctrl, nvtxs);
  swaps   = idxwspacemalloc(ctrl, nvtxs);
  mptr    = idxwspacemalloc(ctrl, nvtxs+1);
  mind    = idxwspacemalloc(ctrl, nvtxs);
  inqueue = idxwspacemalloc(ctrl, nvtxs);

  idxset(nvtxs, -1, inqueue);

  badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1]));

  IFSET(ctrl->dbglvl, DBG_REFINE,
    printf("Partitions-N1: [%6d %6d] Nv-Nb[%6d %6d] MaxPwgt[%6d]. ISep: %6d\n", 
        pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, badmaxpwgt, graph->mincut));

  to = (pwgts[0] < pwgts[1] ? 1 : 0);
  for (pass=0; pass<npasses; pass++) {
    from = to; 
    to   = (from+1)%2;

    PQueueReset(&parts);

    mincutorder = -1;
    initcut = mincut = graph->mincut;
    nbnd = graph->nbnd;

    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = bndind[perm[ii]];
      ASSERT(where[i] == 2);
      if (hmarker[i] == -1 || hmarker[i] == to) {
        PQueueInsert(&parts, i, vwgt[i]-rinfo[i].edegrees[from]);
        inqueue[i] = pass;
      }
    }
    qsize = parts.nnodes;

    ASSERT(CheckNodeBnd(graph, nbnd));
    ASSERT(CheckNodePartitionParams(graph));

    limit = nbnd;

    /******************************************************
    * Get into the FM loop
    *******************************************************/
    mptr[0] = nmind = nbad = 0;
    mindiff = abs(pwgts[0]-pwgts[1]);
    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      if ((higain = PQueueGetMax(&parts)) == -1) 
        break;

      inqueue[higain] = -1;

      ASSERT(bndptr[higain] != -1);

      if (pwgts[to]+vwgt[higain] > badmaxpwgt) { /* Skip this vertex */
        if (nbad++ > limit) 
          break; 
        else {
          nswaps--;
          continue;  
        }
      }

      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[from]);

      newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[from]-rinfo[higain].edegrees[from]));
      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
        mincut      = pwgts[2];
        mincutorder = nswaps;
        mindiff     = newdiff;
        nbad        = 0;
      }
      else {
        if (nbad++ > limit) {
          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[from]);
          break; /* No further improvement, break out */
        }
      }

      BNDDelete(nbnd, bndind, bndptr, higain);
      pwgts[to] += vwgt[higain];
      where[higain] = to;
      swaps[nswaps] = higain;  


      /**********************************************************
      * Update the degrees of the affected nodes
      ***********************************************************/
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
          rinfo[k].edegrees[to] += vwgt[higain];
        }
        else if (where[k] == from) { /* This vertex is pulled into the separator */
          ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k]));
          BNDInsert(nbnd, bndind, bndptr, k);

          mind[nmind++] = k;  /* Keep track for rollback */
          where[k]      = 2;
          pwgts[from]  -= vwgt[k];

          edegrees = rinfo[k].edegrees;
          edegrees[0] = edegrees[1] = 0;
          for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
            kk = adjncy[jj];
            if (where[kk] != 2) 
              edegrees[where[kk]] += vwgt[kk];
            else {
              oldgain = vwgt[kk]-rinfo[kk].edegrees[from];
              rinfo[kk].edegrees[from] -= vwgt[k];

              /* Update the gain of this node if it was skipped */
              if (inqueue[kk] == pass)
                PQueueUpdateUp(&parts, kk, oldgain, oldgain+vwgt[k]); 
            }
          }

          /* Insert the new vertex into the priority queue. Safe due to one-sided moves */
          if (hmarker[k] == -1 || hmarker[k] == to) {
            PQueueInsert(&parts, k, vwgt[k]-edegrees[from]);
            inqueue[k] = pass;
          }
        }
      }
      mptr[nswaps+1] = nmind;


      IFSET(ctrl->dbglvl, DBG_MOVEINFO,
            printf("Moved %6d to %3d, Gain: %5d [%5d] \t[%5d %5d %5d] [%3d %2d]\n", 
                       higain, to, (vwgt[higain]-rinfo[higain].edegrees[from]), vwgt[higain], pwgts[0], pwgts[1], pwgts[2], nswaps, limit));

    }


    /****************************************************************
    * Roll back computation 
    *****************************************************************/
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      ASSERT(CheckNodePartitionParams(graph));
      ASSERT(where[higain] == to);

      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
      where[higain] = 2;
      BNDInsert(nbnd, bndind, bndptr, higain);

      edegrees = rinfo[higain].edegrees;
      edegrees[0] = edegrees[1] = 0;
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        if (where[k] == 2) 
          rinfo[k].edegrees[to] -= vwgt[higain];
        else
          edegrees[where[k]] += vwgt[k];
      }

      /* Push nodes out of the separator */
      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
        k = mind[j];
        ASSERT(where[k] == 2);
        where[k] = from;
        INC_DEC(pwgts[from], pwgts[2], vwgt[k]);
        BNDDelete(nbnd, bndind, bndptr, k);
        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
          kk = adjncy[jj];
          if (where[kk] == 2) 
            rinfo[kk].edegrees[from] += vwgt[k];
        }
      }
    }

    ASSERT(mincut == pwgts[2]);

    IFSET(ctrl->dbglvl, DBG_REFINE,
      printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d, QSIZE: %6d\n", 
          mincut, mincutorder, pwgts[0], pwgts[1], nbnd, qsize));

    graph->mincut = mincut;
    graph->nbnd   = nbnd;

    if (pass%2 == 1 && (mincutorder == -1 || mincut >= initcut))
      break;
  }

  PQueueFree(ctrl, &parts);

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs+1);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
}
Exemplo n.º 23
0
/*************************************************************************
* This function is the entry point of the initial balancing algorithm.
* This algorithm assembles the graph to all the processors and preceeds
* with the balancing step.
**************************************************************************/
void Balance_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    int i, j, mype, npes, nvtxs, nedges, ncon;
    idxtype *vtxdist, *xadj, *adjncy, *adjwgt, *vwgt, *vsize;
    idxtype *part, *lwhere, *home;
    GraphType *agraph, cgraph;
    CtrlType myctrl;
    int lnparts, fpart, fpe, lnpes, ngroups, srnpes, srmype;
    int twoparts=2, numflag = 0, wgtflag = 3, moptions[10], edgecut, max_cut;
    int sr_pe, gd_pe, sr, gd, who_wins, *rcounts, *rdispls;
    float my_cut, my_totalv, my_cost = -1.0, my_balance = -1.0, wsum;
    float rating, max_rating, your_cost = -1.0, your_balance = -1.0;
    float lbvec[MAXNCON], lbsum, min_lbsum, *mytpwgts, mytpwgts2[2], buffer[2];
    MPI_Status status;
    MPI_Comm ipcomm, srcomm;
    struct {
        float cost;
        int rank;
    } lpecost, gpecost;

    IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->InitPartTmr));

    vtxdist = graph->vtxdist;
    agraph = Mc_AssembleAdaptiveGraph(ctrl, graph, wspace);
    nvtxs = cgraph.nvtxs = agraph->nvtxs;
    nedges = cgraph.nedges = agraph->nedges;
    ncon = cgraph.ncon = agraph->ncon;

    xadj = cgraph.xadj = idxmalloc(nvtxs*(5+ncon)+1+nedges*2, "U_IP: xadj");
    vwgt = cgraph.vwgt = xadj + nvtxs+1;
    vsize = cgraph.vsize = xadj + nvtxs*(1+ncon)+1;
    cgraph.where = agraph->where = part = xadj + nvtxs*(2+ncon)+1;
    lwhere = xadj + nvtxs*(3+ncon)+1;
    home = xadj + nvtxs*(4+ncon)+1;
    adjncy = cgraph.adjncy = xadj + nvtxs*(5+ncon)+1;
    adjwgt = cgraph.adjwgt = xadj + nvtxs*(5+ncon)+1 + nedges;

    /* ADD: this assumes that tpwgts for all constraints is the same */
    /* ADD: this is necessary because serial metis does not support the general case */
    mytpwgts = fsmalloc(ctrl->nparts, 0.0, "mytpwgts");
    for (i=0; i<ctrl->nparts; i++)
        for (j=0; j<ncon; j++)
            mytpwgts[i] += ctrl->tpwgts[i*ncon+j];
    for (i=0; i<ctrl->nparts; i++)
        mytpwgts[i] /= (float)ncon;

    idxcopy(nvtxs+1, agraph->xadj, xadj);
    idxcopy(nvtxs*ncon, agraph->vwgt, vwgt);
    idxcopy(nvtxs, agraph->vsize, vsize);
    idxcopy(nedges, agraph->adjncy, adjncy);
    idxcopy(nedges, agraph->adjwgt, adjwgt);

    /****************************************/
    /****************************************/
    if (ctrl->ps_relation == DISCOUPLED) {
        rcounts = imalloc(ctrl->npes, "rcounts");
        rdispls = imalloc(ctrl->npes+1, "rdispls");

        for (i=0; i<ctrl->npes; i++) {
            rdispls[i] = rcounts[i] = vtxdist[i+1]-vtxdist[i];
        }
        MAKECSR(i, ctrl->npes, rdispls);

        MPI_Allgatherv((void *)graph->home, graph->nvtxs, IDX_DATATYPE,
                       (void *)part, rcounts, rdispls, IDX_DATATYPE, ctrl->comm);

        for (i=0; i<agraph->nvtxs; i++)
            home[i] = part[i];

        GKfree((void **)&rcounts, (void **)&rdispls, LTERM);
    }
    else {
        for (i=0; i<ctrl->npes; i++)
            for (j=vtxdist[i]; j<vtxdist[i+1]; j++)
                part[j] = home[j] = i;
    }

    /* Ensure that the initial partitioning is legal */
    for (i=0; i<agraph->nvtxs; i++) {
        if (part[i] >= ctrl->nparts)
            part[i] = home[i] = part[i] % ctrl->nparts;
        if (part[i] < 0)
            part[i] = home[i] = (-1*part[i]) % ctrl->nparts;
    }
    /****************************************/
    /****************************************/

    IFSET(ctrl->dbglvl, DBG_REFINEINFO, Mc_ComputeSerialBalance(ctrl, agraph, agraph->where, lbvec));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "input cut: %d, balance: ", ComputeSerialEdgeCut(agraph)));
    for (i=0; i<agraph->ncon; i++)
        IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "%.3f ", lbvec[i]));
    IFSET(ctrl->dbglvl, DBG_REFINEINFO, rprintf(ctrl, "\n"));

    /****************************************/
    /* Split the processors into two groups */
    /****************************************/
    sr = (ctrl->mype % 2 == 0) ? 1 : 0;
    gd = (ctrl->mype % 2 == 1) ? 1 : 0;

    if (graph->ncon > MAX_NCON_FOR_DIFFUSION || ctrl->npes == 1) {
        sr = 1;
        gd = 0;
    }

    sr_pe = 0;
    gd_pe = 1;

    MPI_Comm_split(ctrl->gcomm, sr, 0, &ipcomm);
    MPI_Comm_rank(ipcomm, &mype);
    MPI_Comm_size(ipcomm, &npes);

    myctrl.dbglvl = 0;
    myctrl.mype = mype;
    myctrl.npes = npes;
    myctrl.comm = ipcomm;
    myctrl.sync = ctrl->sync;
    myctrl.seed = ctrl->seed;
    myctrl.nparts = ctrl->nparts;
    myctrl.ipc_factor = ctrl->ipc_factor;
    myctrl.redist_factor = ctrl->redist_base;
    myctrl.partType = ADAPTIVE_PARTITION;
    myctrl.ps_relation = DISCOUPLED;
    myctrl.tpwgts = ctrl->tpwgts;
    icopy(ncon, ctrl->tvwgts, myctrl.tvwgts);
    icopy(ncon, ctrl->ubvec, myctrl.ubvec);

    if (sr == 1) {
        /*******************************************/
        /* Half of the processors do scratch-remap */
        /*******************************************/
        ngroups = amax(amin(RIP_SPLIT_FACTOR, npes), 1);
        MPI_Comm_split(ipcomm, mype % ngroups, 0, &srcomm);
        MPI_Comm_rank(srcomm, &srmype);
        MPI_Comm_size(srcomm, &srnpes);

        moptions[0] = 0;
        moptions[7] = ctrl->sync + (mype % ngroups) + 1;

        idxset(nvtxs, 0, lwhere);
        lnparts = ctrl->nparts;
        fpart = fpe = 0;
        lnpes = srnpes;
        while (lnpes > 1 && lnparts > 1) {
            ASSERT(ctrl, agraph->nvtxs > 1);
            /* Determine the weights of the partitions */
            mytpwgts2[0] = ssum(lnparts/2, mytpwgts+fpart);
            mytpwgts2[1] = 1.0-mytpwgts2[0];


            if (agraph->ncon == 1) {
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2, moptions, &edgecut,
                                      part);
            }
            else {
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &twoparts, mytpwgts2,
                                            moptions, &edgecut, part);
            }

            wsum = ssum(lnparts/2, mytpwgts+fpart);
            sscale(lnparts/2, 1.0/wsum, mytpwgts+fpart);
            sscale(lnparts-lnparts/2, 1.0/(1.0-wsum), mytpwgts+fpart+lnparts/2);

            /* I'm picking the left branch */
            if (srmype < fpe+lnpes/2) {
                Mc_KeepPart(agraph, wspace, part, 0);
                lnpes = lnpes/2;
                lnparts = lnparts/2;
            }
            else {
                Mc_KeepPart(agraph, wspace, part, 1);
                fpart = fpart + lnparts/2;
                fpe = fpe + lnpes/2;
                lnpes = lnpes - lnpes/2;
                lnparts = lnparts - lnparts/2;
            }
        }

        /* In case srnpes is greater than or equal to nparts */
        if (lnparts == 1) {
            /* Only the first process will assign labels (for the reduction to work) */
            if (srmype == fpe) {
                for (i=0; i<agraph->nvtxs; i++)
                    lwhere[agraph->label[i]] = fpart;
            }
        }
        /* In case srnpes is smaller than nparts */
        else {
            if (ncon == 1)
                METIS_WPartGraphKway2(&agraph->nvtxs, agraph->xadj, agraph->adjncy, agraph->vwgt,
                                      agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart, moptions,
                                      &edgecut, part);
            else
                METIS_mCPartGraphRecursive2(&agraph->nvtxs, &ncon, agraph->xadj, agraph->adjncy,
                                            agraph->vwgt, agraph->adjwgt, &wgtflag, &numflag, &lnparts, mytpwgts+fpart,
                                            moptions, &edgecut, part);

            for (i=0; i<agraph->nvtxs; i++)
                lwhere[agraph->label[i]] = fpart + part[i];
        }

        MPI_Allreduce((void *)lwhere, (void *)part, nvtxs, IDX_DATATYPE, MPI_SUM, srcomm);

        edgecut = ComputeSerialEdgeCut(&cgraph);
        Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
        lbsum = ssum(ncon, lbvec);
        MPI_Allreduce((void *)&edgecut, (void *)&max_cut, 1, MPI_INT, MPI_MAX, ipcomm);
        MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);
        lpecost.rank = ctrl->mype;
        lpecost.cost = lbsum;
        if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
            if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                lpecost.cost = (float)edgecut;
            else
                lpecost.cost = (float)max_cut + lbsum;
        }
        MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

        if (ctrl->mype == gpecost.rank && ctrl->mype != sr_pe) {
            MPI_Send((void *)part, nvtxs, IDX_DATATYPE, sr_pe, 1, ctrl->comm);
        }

        if (ctrl->mype != gpecost.rank && ctrl->mype == sr_pe) {
            MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);
        }

        if (ctrl->mype == sr_pe) {
            idxcopy(nvtxs, part, lwhere);
            SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
        }

        MPI_Comm_free(&srcomm);
    }
    /**************************************/
    /* The other half do global diffusion */
    /**************************************/
    else {
        /******************************************************************/
        /* The next stmt is required to balance out the sr MPI_Comm_split */
        /******************************************************************/
        MPI_Comm_split(ipcomm, MPI_UNDEFINED, 0, &srcomm);

        if (ncon == 1) {
            rating = WavefrontDiffusion(&myctrl, agraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            lbsum = ssum(ncon, lbvec);

            /* Determine which PE computed the best partitioning */
            MPI_Allreduce((void *)&rating, (void *)&max_rating, 1, MPI_FLOAT, MPI_MAX, ipcomm);
            MPI_Allreduce((void *)&lbsum, (void *)&min_lbsum, 1, MPI_FLOAT, MPI_MIN, ipcomm);

            lpecost.rank = ctrl->mype;
            lpecost.cost = lbsum;
            if (min_lbsum < UNBALANCE_FRACTION * (float)(ncon)) {
                if (lbsum < UNBALANCE_FRACTION * (float)(ncon))
                    lpecost.cost = rating;
                else
                    lpecost.cost = max_rating + lbsum;
            }

            MPI_Allreduce((void *)&lpecost, (void *)&gpecost, 1, MPI_FLOAT_INT, MPI_MINLOC, ipcomm);

            /* Now send this to the coordinating processor */
            if (ctrl->mype == gpecost.rank && ctrl->mype != gd_pe)
                MPI_Send((void *)part, nvtxs, IDX_DATATYPE, gd_pe, 1, ctrl->comm);

            if (ctrl->mype != gpecost.rank && ctrl->mype == gd_pe)
                MPI_Recv((void *)part, nvtxs, IDX_DATATYPE, gpecost.rank, 1, ctrl->comm, &status);

            if (ctrl->mype == gd_pe) {
                idxcopy(nvtxs, part, lwhere);
                SerialRemap(&cgraph, ctrl->nparts, home, lwhere, part, ctrl->tpwgts);
            }
        }
        else {
            Mc_Diffusion(&myctrl, agraph, graph->vtxdist, agraph->where, home, wspace, N_MOC_GD_PASSES);
        }
    }

    if (graph->ncon <= MAX_NCON_FOR_DIFFUSION) {
        if (ctrl->mype == sr_pe  || ctrl->mype == gd_pe) {
            /********************************************************************/
            /* The coordinators from each group decide on the best partitioning */
            /********************************************************************/
            my_cut = (float) ComputeSerialEdgeCut(&cgraph);
            my_totalv = (float) Mc_ComputeSerialTotalV(&cgraph, home);
            Mc_ComputeSerialBalance(ctrl, &cgraph, part, lbvec);
            my_balance = ssum(cgraph.ncon, lbvec);
            my_balance /= (float) cgraph.ncon;
            my_cost = ctrl->ipc_factor * my_cut + REDIST_WGT * ctrl->redist_base * my_totalv;

            IFSET(ctrl->dbglvl, DBG_REFINEINFO, printf("%s initial cut: %.1f, totalv: %.1f, balance: %.3f\n",
                    (ctrl->mype == sr_pe ? "scratch-remap" : "diffusion"), my_cut, my_totalv, my_balance));

            if (ctrl->mype == gd_pe) {
                buffer[0] = my_cost;
                buffer[1] = my_balance;
                MPI_Send((void *)buffer, 2, MPI_FLOAT, sr_pe, 1, ctrl->comm);
            }
            else {
                MPI_Recv((void *)buffer, 2, MPI_FLOAT, gd_pe, 1, ctrl->comm, &status);
                your_cost = buffer[0];
                your_balance = buffer[1];
            }
        }

        if (ctrl->mype == sr_pe) {
            who_wins = gd_pe;
            if ((my_balance < 1.1 && your_balance > 1.1) ||
                    (my_balance < 1.1 && your_balance < 1.1 && my_cost < your_cost) ||
                    (my_balance > 1.1 && your_balance > 1.1 && my_balance < your_balance)) {
                who_wins = sr_pe;
            }
        }

        MPI_Bcast((void *)&who_wins, 1, MPI_INT, sr_pe, ctrl->comm);
    }
    else {
        who_wins = sr_pe;
    }

    MPI_Bcast((void *)part, nvtxs, IDX_DATATYPE, who_wins, ctrl->comm);
    idxcopy(graph->nvtxs, part+vtxdist[ctrl->mype], graph->where);

    MPI_Comm_free(&ipcomm);
    GKfree((void **)&xadj, (void **)&mytpwgts, LTERM);

    /* For whatever reason, FreeGraph crashes here...so explicitly free the memory.
      FreeGraph(agraph);
    */
    GKfree((void **)&agraph->xadj, (void **)&agraph->adjncy, (void **)&agraph->vwgt, (void **)&agraph->nvwgt, LTERM);
    GKfree((void **)&agraph->vsize, (void **)&agraph->adjwgt, (void **)&agraph->label, LTERM);
    GKfree((void **)&agraph, LTERM);

    IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->InitPartTmr));

}
Exemplo n.º 24
0
/***********************************************************************************
* This function is the entry point of the parallel kmetis algorithm that uses
* coordinates to compute an initial graph distribution.
************************************************************************************/
void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, 
	      float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, 
	      int *options, int *edgecut, idxtype *part, MPI_Comm *comm)
{
  int h, i, j;
  int nvtxs = -1, npes, mype;
  int uwgtflag, cut, gcut, maxnvtxs;
  int ltvwgts[MAXNCON];
  int moptions[10];
  CtrlType ctrl;
  idxtype *uvwgt;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  float avg, maximb, balance, *mytpwgts;
  int seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /********************************/
  /* Try and take care bad inputs */
  /********************************/
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];

  CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
	      NULL, NULL, options, ioptions, part, comm);


  /*********************************/
  /* Take care the nparts = 1 case */
  /*********************************/
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part);
    *edgecut = 0;
    return;
  }

  /******************************/
  /* Take care of npes = 1 case */
  /******************************/
  if (npes == 1 && inparts > 1) {
    moptions[0] = 0;
    nvtxs = vtxdist[1];

    if (incon == 1) {
      METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, 
            &inparts, itpwgts, moptions, edgecut, part);
    }
    else {
      /* ADD: this is because METIS does not support tpwgts for all constraints */
      mytpwgts = fmalloc(inparts, "mytpwgts");
      for (i=0; i<inparts; i++)
        mytpwgts[i] = itpwgts[i*incon];

      moptions[7] = -1;
      METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, 
            &inumflag, &inparts, mytpwgts, moptions, edgecut, part);

      free(mytpwgts);
    }

    return;
  }


  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /*****************************/
  /* Set up control structures */
  /*****************************/
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
  }
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
  }
  SetUpCtrl(&ctrl, npes, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts));
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = itpwgts;
  scopy(incon, iubvec, ctrl.ubvec);

  uwgtflag = iwgtflag|2;
  uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt");
  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag);
  free(graph->nvwgt); graph->nvwgt = NULL;

  PreAllocateMemory(&ctrl, graph, &wspace);

  /*=================================================================
   * Compute the initial npes-way partitioning geometric partitioning
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

  /*=================================================================
   * Move the graph according to the partitioning
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  free(uvwgt);
  graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt");
  graph->ncon = incon;
  j = ctrl.nparts;
  ctrl.nparts = ctrl.npes;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);
  ctrl.nparts = j;

  /**********************************************************/
  /* Do the same functionality as Moc_SetUpGraph for mgraph */
  /**********************************************************/
  /* compute tvwgts */
  for (j=0; j<incon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<graph->nvtxs; i++)
    for (j=0; j<incon; j++)
      ltvwgts[j] += mgraph->vwgt[i*incon+j];

  for (j=0; j<incon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  /* check for zero wgt constraints */
  for (i=0; i<incon; i++) {
    /* ADD: take care of the case in which tvwgts is zero */
    if (ctrl.tvwgts[i] == 0) {
      if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i);
      MPI_Finalize();
      exit(-1);
    }
  }

  /* compute nvwgt */
  mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<incon; j++)
      mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]);


  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

  if (ctrl.dbglvl&DBG_INFO) {
    cut = 0;
    for (i=0; i<graph->nvtxs; i++)
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
        if (graph->where[i] != graph->where[graph->adjncy[j]])
          cut += graph->adjwgt[j];
    gcut = GlobalSESum(&ctrl, cut)/2;
    maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs);
    balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes));
    rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n",
      gcut, balance, maxnvtxs, graph->gnvtxs, npes);

  }

  /*=================================================================
   * Set up the newly moved graph
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  ctrl.nparts = inparts;
  FreeWSpace(&wspace);
  PreAllocateMemory(&ctrl, mgraph, &wspace);

  /*=======================================================
   * Now compute the partition of the moved graph
   =======================================================*/
  if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) {
    IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes]));
    PartitionSmallGraph(&ctrl, mgraph, &wspace);
  }
  else {
    Moc_Global_Partition(&ctrl, mgraph, &wspace);
  }
  ParallelReMapGraph(&ctrl, mgraph, &wspace);

  /* Invert the ordering back to the original graph */
  ctrl.nparts = npes;
  ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace);

  *edgecut = mgraph->mincut;

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  /*******************/
  /* Print out stats */
  /*******************/
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    }
    rprintf(&ctrl, "  avg: %.3f\n", avg/(float)incon);
  }

  GKfree((void **)&itpwgts, LTERM);
  FreeGraph(mgraph);
  FreeInitialGraphAndRemap(graph, iwgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

}
Exemplo n.º 25
0
void FM_2WayNodeRefine_TwoSidedP(CtrlType *ctrl, GraphType *graph, 
          idxtype *hmarker, float ubfactor, int npasses)
{
  int i, ii, j, k, jj, kk, nvtxs, nbnd, nswaps, nmind;
  idxtype *xadj, *vwgt, *adjncy, *where, *pwgts, *edegrees, *bndind, *bndptr;
  idxtype *mptr, *mind, *moved, *swaps, *perm;
  PQueueType parts[2]; 
  NRInfoType *rinfo;
  int higain, oldgain, mincut, initcut, mincutorder;	
  int pass, to, other, limit;
  int badmaxpwgt, mindiff, newdiff;
  int u[2], g[2];

  nvtxs  = graph->nvtxs;
  xadj   = graph->xadj;
  adjncy = graph->adjncy;
  vwgt   = graph->vwgt;

  bndind = graph->bndind;
  bndptr = graph->bndptr;
  where  = graph->where;
  pwgts  = graph->pwgts;
  rinfo  = graph->nrinfo;


  i = ComputeMaxNodeGain(nvtxs, xadj, adjncy, vwgt);
  PQueueInit(ctrl, &parts[0], nvtxs, i);
  PQueueInit(ctrl, &parts[1], nvtxs, i);

  moved    = idxwspacemalloc(ctrl, nvtxs);
  swaps    = idxwspacemalloc(ctrl, nvtxs);
  mptr     = idxwspacemalloc(ctrl, nvtxs+1);
  mind     = idxwspacemalloc(ctrl, nvtxs);
  perm     = idxwspacemalloc(ctrl, nvtxs);

  IFSET(ctrl->dbglvl, DBG_REFINE,
    printf("Partitions: [%6d %6d] Nv-Nb[%6d %6d]. ISep: %6d\n", pwgts[0], pwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));

  badmaxpwgt = (int)(ubfactor*amax(pwgts[0], pwgts[1]));

  for (pass=0; pass<npasses; pass++) {
    idxset(nvtxs, -1, moved);
    PQueueReset(&parts[0]);
    PQueueReset(&parts[1]);

    mincutorder = -1;
    initcut = mincut = graph->mincut;
    nbnd = graph->nbnd;

    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = bndind[perm[ii]];
      ASSERT(where[i] == 2);
      if (hmarker[i] == -1) {
        PQueueInsert(&parts[0], i, vwgt[i]-rinfo[i].edegrees[1]);
        PQueueInsert(&parts[1], i, vwgt[i]-rinfo[i].edegrees[0]);
        moved[i] = -5;
      }
      else if (hmarker[i] != 2) {
        PQueueInsert(&parts[hmarker[i]], i, vwgt[i]-rinfo[i].edegrees[(hmarker[i]+1)%2]);
        moved[i] = -(10+hmarker[i]);
      }
    }

    ASSERT(CheckNodeBnd(graph, nbnd));
    ASSERT(CheckNodePartitionParams(graph));

    limit = nbnd;

    /******************************************************
    * Get into the FM loop
    *******************************************************/
    mptr[0] = nmind = 0;
    mindiff = abs(pwgts[0]-pwgts[1]);
    to = (pwgts[0] < pwgts[1] ? 0 : 1);
    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      u[0] = PQueueSeeMax(&parts[0]);  
      u[1] = PQueueSeeMax(&parts[1]);
      if (u[0] != -1 && u[1] != -1) {
        g[0] = vwgt[u[0]]-rinfo[u[0]].edegrees[1];
        g[1] = vwgt[u[1]]-rinfo[u[1]].edegrees[0];

        to = (g[0] > g[1] ? 0 : (g[0] < g[1] ? 1 : pass%2)); 

        if (pwgts[to]+vwgt[u[to]] > badmaxpwgt) 
          to = (to+1)%2;
      }
      else if (u[0] == -1 && u[1] == -1) {
        break;
      }
      else if (u[0] != -1 && pwgts[0]+vwgt[u[0]] <= badmaxpwgt) {
        to = 0;
      }
      else if (u[1] != -1 && pwgts[1]+vwgt[u[1]] <= badmaxpwgt) {
        to = 1;
      }
      else
        break;

      other = (to+1)%2;

      higain = PQueueGetMax(&parts[to]);

      /* Delete its matching entry in the other queue */
      if (moved[higain] == -5) 
        PQueueDelete(&parts[other], higain, vwgt[higain]-rinfo[higain].edegrees[to]);

      ASSERT(bndptr[higain] != -1);

      pwgts[2] -= (vwgt[higain]-rinfo[higain].edegrees[other]);

      newdiff = abs(pwgts[to]+vwgt[higain] - (pwgts[other]-rinfo[higain].edegrees[other]));
      if (pwgts[2] < mincut || (pwgts[2] == mincut && newdiff < mindiff)) {
        mincut      = pwgts[2];
        mincutorder = nswaps;
        mindiff     = newdiff;
      }
      else {
        if (nswaps - mincutorder > limit) {
          pwgts[2] += (vwgt[higain]-rinfo[higain].edegrees[other]);
          break; /* No further improvement, break out */
        }
      }

      BNDDelete(nbnd, bndind, bndptr, higain);
      pwgts[to] += vwgt[higain];
      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;  


      /**********************************************************
      * Update the degrees of the affected nodes
      ***********************************************************/
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        if (where[k] == 2) { /* For the in-separator vertices modify their edegree[to] */
          oldgain = vwgt[k]-rinfo[k].edegrees[to];
          rinfo[k].edegrees[to] += vwgt[higain];
          if (moved[k] == -5 || moved[k] == -(10+other)) 
            PQueueUpdate(&parts[other], k, oldgain, oldgain-vwgt[higain]);
        }
        else if (where[k] == other) { /* This vertex is pulled into the separator */
          ASSERTP(bndptr[k] == -1, ("%d %d %d\n", k, bndptr[k], where[k]));
          BNDInsert(nbnd, bndind, bndptr, k);

          mind[nmind++] = k;  /* Keep track for rollback */
          where[k] = 2;
          pwgts[other] -= vwgt[k];

          edegrees = rinfo[k].edegrees;
          edegrees[0] = edegrees[1] = 0;
          for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
            kk = adjncy[jj];
            if (where[kk] != 2) 
              edegrees[where[kk]] += vwgt[kk];
            else {
              oldgain = vwgt[kk]-rinfo[kk].edegrees[other];
              rinfo[kk].edegrees[other] -= vwgt[k];
              if (moved[kk] == -5 || moved[kk] == -(10+to))
                PQueueUpdate(&parts[to], kk, oldgain, oldgain+vwgt[k]);
            }
          }

          /* Insert the new vertex into the priority queue (if it has not been moved). */
          if (moved[k] == -1 && (hmarker[k] == -1 || hmarker[k] == to)) {
            PQueueInsert(&parts[to], k, vwgt[k]-edegrees[other]);
            moved[k] = -(10+to);
          }
#ifdef FULLMOVES  /* this does not work as well as the above partial one */
          if (moved[k] == -1) {
            if (hmarker[k] == -1) {
              PQueueInsert(&parts[0], k, vwgt[k]-edegrees[1]);
              PQueueInsert(&parts[1], k, vwgt[k]-edegrees[0]);
              moved[k] = -5;
            }
            else if (hmarker[k] != 2) {
              PQueueInsert(&parts[hmarker[k]], k, vwgt[k]-edegrees[(hmarker[k]+1)%2]);
              moved[k] = -(10+hmarker[k]);
            }
          }
#endif
        }
      }
      mptr[nswaps+1] = nmind;

      IFSET(ctrl->dbglvl, DBG_MOVEINFO,
            printf("Moved %6d to %3d, Gain: %5d [%5d] [%4d %4d] \t[%5d %5d %5d]\n", higain, to, g[to], g[other], vwgt[u[to]], vwgt[u[other]], pwgts[0], pwgts[1], pwgts[2]));

    }


    /****************************************************************
    * Roll back computation 
    *****************************************************************/
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      ASSERT(CheckNodePartitionParams(graph));

      to = where[higain];
      other = (to+1)%2;
      INC_DEC(pwgts[2], pwgts[to], vwgt[higain]);
      where[higain] = 2;
      BNDInsert(nbnd, bndind, bndptr, higain);

      edegrees = rinfo[higain].edegrees;
      edegrees[0] = edegrees[1] = 0;
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        if (where[k] == 2) 
          rinfo[k].edegrees[to] -= vwgt[higain];
        else
          edegrees[where[k]] += vwgt[k];
      }

      /* Push nodes out of the separator */
      for (j=mptr[nswaps]; j<mptr[nswaps+1]; j++) {
        k = mind[j];
        ASSERT(where[k] == 2);
        where[k] = other;
        INC_DEC(pwgts[other], pwgts[2], vwgt[k]);
        BNDDelete(nbnd, bndind, bndptr, k);
        for (jj=xadj[k]; jj<xadj[k+1]; jj++) {
          kk = adjncy[jj];
          if (where[kk] == 2) 
            rinfo[kk].edegrees[other] += vwgt[k];
        }
      }
    }

    ASSERT(mincut == pwgts[2]);

    IFSET(ctrl->dbglvl, DBG_REFINE,
      printf("\tMinimum sep: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd));

    graph->mincut = mincut;
    graph->nbnd = nbnd;

    if (mincutorder == -1 || mincut >= initcut)
      break;
  }

  PQueueFree(ctrl, &parts[0]);
  PQueueFree(ctrl, &parts[1]);

  idxwspacefree(ctrl, nvtxs+1);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
}
Exemplo n.º 26
0
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, 
                 int *numflag, int *ncommonnodes, idxtype **xadj, 
		 idxtype **adjncy, MPI_Comm *comm)
{
  int i, j, jj, k, kk, m;
  int npes, mype, pe, count, mask, pass;
  int nelms, lnns, my_nns, node;
  int firstelm, firstnode, lnode, nrecv, nsend;
  int *scounts, *rcounts, *sdispl, *rdispl;
  idxtype *nodedist, *nmap, *auxarray;
  idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
  idxtype *sbuffer, *rbuffer, *htable;
  KeyValueType *nodelist, *recvbuffer;
  idxtype ind[200], wgt[200];
  int gmaxnode, gminnode;
  CtrlType ctrl;


  SetUpCtrl(&ctrl, -1, 0, *comm);

  npes = ctrl.npes;
  mype = ctrl.mype;

  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERTS(nelms > 0);

  /* construct node distribution array */
  nodedist = idxsmalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
  auxarray = idxmalloc(eptr[nelms], "auxarray");
  htable   = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(4*npes+2, "scounts");
  rcounts  = scounts+npes;
  sdispl   = scounts+2*npes;
  rdispl   = scounts+3*npes+1;


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikeysort(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = idxmalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  MPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERTS(pe < npes);

  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERTS(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");

  MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = idxsmalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERTS(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERTS(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv = rdispl[npes];
  rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));

  MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  k = -1;
  nptr = idxsmalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERTS(k+1 == lnns);
  ASSERTS(nptr[lnns] == nrecv)

  myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
  idxset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommonnodes) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, 
         (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, 
	 (void **)&gnind, (void **)&auxarray, LTERM);

  FreeCtrl(&ctrl);

  return;
}
Exemplo n.º 27
0
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void MCRandom_KWayEdgeRefineHorizontal(CtrlType *ctrl, GraphType *graph, int nparts, 
       float *orgubvec, int npasses)
{
  int i, ii, iii, j, /*jj,*/ k, /*l,*/ pass, nvtxs, ncon, nmoves, nbnd, myndegrees, same; 
  int from, me, to, oldcut, gain;
  idxtype *xadj, *adjncy, *adjwgt;
  idxtype *where, *perm, *bndptr, *bndind;
  EDegreeType *myedegrees;
  RInfoType *myrinfo;
  float *npwgts, *nvwgt, *minwgt, *maxwgt, maxlb, minlb, ubvec[MAXNCON], tvec[MAXNCON];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  bndptr = graph->bndptr;
  bndind = graph->bndind;

  where = graph->where;
  npwgts = graph->npwgts;
  
  /* Setup the weight intervals of the various subdomains */
  minwgt =  fwspacemalloc(ctrl, nparts*ncon);
  maxwgt = fwspacemalloc(ctrl, nparts*ncon);

  /* See if the orgubvec consists of identical constraints */
  maxlb = minlb = orgubvec[0];
  for (i=1; i<ncon; i++) {
    minlb = (orgubvec[i] < minlb ? orgubvec[i] : minlb);
    maxlb = (orgubvec[i] > maxlb ? orgubvec[i] : maxlb);
  }
  same = (fabs(maxlb-minlb) < .01 ? 1 : 0);


  /* Let's not get very optimistic. Let Balancing do the work */
  ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec);
  for (i=0; i<ncon; i++)
    ubvec[i] = amax(ubvec[i], orgubvec[i]);

  if (!same) {
    for (i=0; i<nparts; i++) {
      for (j=0; j<ncon; j++) {
        maxwgt[i*ncon+j] = ubvec[j]/nparts;
        minwgt[i*ncon+j] = 1.0/(ubvec[j]*nparts);
      }
    }
  }
  else {
    maxlb = ubvec[0];
    for (i=1; i<ncon; i++) 
      maxlb = (ubvec[i] > maxlb ? ubvec[i] : maxlb);

    for (i=0; i<nparts; i++) {
      for (j=0; j<ncon; j++) {
        maxwgt[i*ncon+j] = maxlb/nparts;
        minwgt[i*ncon+j] = 1.0/(maxlb*nparts);
      }
    }
  }


  perm = idxwspacemalloc(ctrl, nvtxs);

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Partitions: [%5.4f %5.4f], Nv-Nb[%6d %6d]. Cut: %6d, LB: ",
            npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
            graph->nvtxs, graph->nbnd, graph->mincut);
    ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
    for (i=0; i<ncon; i++)
      printf("%.3f ", tvec[i]);
    printf("\n");
  }

  for (pass=0; pass<npasses; pass++) {
    ASSERT(ComputeCut(graph, where) == graph->mincut);

    oldcut = graph->mincut;
    nbnd = graph->nbnd;

    RandomPermute(nbnd, perm, 1);
    for (nmoves=iii=0; iii<graph->nbnd; iii++) {
      ii = perm[iii];
      if (ii >= nbnd)
        continue;
      i = bndind[ii];

      myrinfo = graph->rinfo+i;

      if (myrinfo->ed >= myrinfo->id) { /* Total ED is too high */
        from = where[i];
        nvwgt = graph->nvwgt+i*ncon;

        if (myrinfo->id > 0 && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) 
          continue;   /* This cannot be moved! */

        myedegrees = myrinfo->edegrees;
        myndegrees = myrinfo->ndegrees;

        for (k=0; k<myndegrees; k++) {
          to = myedegrees[k].pid;
          gain = myedegrees[k].ed - myrinfo->id; 
          if (gain >= 0 && 
              (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            break;
        }
        if (k == myndegrees)
          continue;  /* break out if you did not find a candidate */

        for (j=k+1; j<myndegrees; j++) {
          to = myedegrees[j].pid;
          if ((myedegrees[j].ed > myedegrees[k].ed &&
               (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || 
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) ||
              (myedegrees[j].ed == myedegrees[k].ed && 
               IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            k = j;
        }

        to = myedegrees[k].pid;

        if (myedegrees[k].ed-myrinfo->id == 0 
            && !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)
            && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon)) 
          continue;

        /*=====================================================================
        * If we got here, we can now move the vertex from 'from' to 'to' 
        *======================================================================*/
        graph->mincut -= myedegrees[k].ed-myrinfo->id;

        IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut));

        /* Update where, weight, and ID/ED information of the vertex you moved */
        saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
        saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
        where[i] = to;
        myrinfo->ed += myrinfo->id-myedegrees[k].ed;
        SWAP(myrinfo->id, myedegrees[k].ed, j);
        if (myedegrees[k].ed == 0) 
          myedegrees[k] = myedegrees[--myrinfo->ndegrees];
        else
          myedegrees[k].pid = from;

        if (myrinfo->ed-myrinfo->id < 0)
          BNDDelete(nbnd, bndind, bndptr, i);

        /* Update the degrees of adjacent vertices */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          ii = adjncy[j];
          me = where[ii];

          myrinfo = graph->rinfo+ii;
          if (myrinfo->edegrees == NULL) {
            myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
            ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii];
          }
          myedegrees = myrinfo->edegrees;

          ASSERT(CheckRInfo(myrinfo));

          if (me == from) {
            INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);

            if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1)
              BNDInsert(nbnd, bndind, bndptr, ii);
          }
          else if (me == to) {
            INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);

            if (myrinfo->ed-myrinfo->id < 0 && bndptr[ii] != -1)
              BNDDelete(nbnd, bndind, bndptr, ii);
          }

          /* Remove contribution from the .ed of 'from' */
          if (me != from) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (myedegrees[k].pid == from) {
                if (myedegrees[k].ed == adjwgt[j])
                  myedegrees[k] = myedegrees[--myrinfo->ndegrees];
                else
                  myedegrees[k].ed -= adjwgt[j];
                break;
              }
            }
          }

          /* Add contribution to the .ed of 'to' */
          if (me != to) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (myedegrees[k].pid == to) {
                myedegrees[k].ed += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              myedegrees[myrinfo->ndegrees].pid = to;
              myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
            }
          }

          ASSERT(myrinfo->ndegrees <= xadj[ii+1]-xadj[ii]);
          ASSERT(CheckRInfo(myrinfo));

        }
        nmoves++;
      }
    }

    graph->nbnd = nbnd;

    if (ctrl->dbglvl&DBG_REFINE) {
      printf("\t [%5.4f %5.4f], Nb: %6d, Nmoves: %5d, Cut: %6d, LB: ",
              npwgts[samin(ncon*nparts, npwgts)], npwgts[samax(ncon*nparts, npwgts)], 
              nbnd, nmoves, graph->mincut);
      ComputeHKWayLoadImbalance(ncon, nparts, npwgts, tvec);
      for (i=0; i<ncon; i++)
        printf("%.3f ", tvec[i]);
      printf("\n");
    }

    if (graph->mincut == oldcut)
      break;
  }

  fwspacefree(ctrl, ncon*nparts);
  fwspacefree(ctrl, ncon*nparts);
  idxwspacefree(ctrl, nvtxs);
}
Exemplo n.º 28
0
Arquivo: mfm.c Projeto: cran/BigQuic
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void MocFM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, float *tpwgts, int npasses)
{
  int i, ii, j, k, l, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, me, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *perm, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  PQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, initcut, newcut, mincutorder;
  float rtpwgts[2];

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  npwgts = graph->npwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);
  qnum = idxwspacemalloc(ctrl, nvtxs);

  limit = amin(amax(0.01*nvtxs, 25), 150);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    PQueueInit(ctrl, &parts[i][0], nvtxs, PLUS_GAINSPAN+1);
    PQueueInit(ctrl, &parts[i][1], nvtxs, PLUS_GAINSPAN+1);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  origbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

  rtpwgts[0] = origbal*tpwgts[0];
  rtpwgts[1] = origbal*tpwgts[1];

/*
  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
  }
*/

  idxset(nvtxs, -1, moved);
  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */
    for (i=0; i<ncon; i++) { 
      PQueueReset(&parts[i][0]);
      PQueueReset(&parts[i][1]);
    }

    mincutorder = -1;
    newcut = mincut = initcut = graph->mincut;
    for (i=0; i<ncon; i++)
      mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
    minbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    ASSERT(ComputeCut(graph, where) == graph->mincut);
    ASSERT(CheckBnd(graph));

    /* Insert boundary nodes in the priority queues */
    nbnd = graph->nbnd;
    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = bndind[perm[ii]];
      ASSERT(ed[i] > 0 || id[i] == 0);
      ASSERT(bndptr[i] != -1);
      PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
    }

    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
      to = (from+1)%2;

      if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
        break;
      ASSERT(bndptr[higain] != -1);

      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

      newcut -= (ed[higain]-id[higain]);
      newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

      if ((newcut < mincut && newbal-origbal <= .00001) || 
          (newcut == mincut && (newbal < minbal || 
                                (newbal == minbal && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
        mincut = newcut;
        minbal = newbal;
        mincutorder = nswaps;
        for (i=0; i<ncon; i++)
          mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
      }
      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
        newcut += (ed[higain]-id[higain]);
        saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
        saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
        break;
      }

      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;

/*
      if (ctrl->dbglvl&DBG_MOVEINFO) {
        printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
        for (l=0; l<ncon; l++) 
          printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
        printf(", %.3f LB: %.3f\n", minbal, newbal);
      }
*/


      /**************************************************************
      * Update the id[i]/ed[i] values of the affected nodes
      ***************************************************************/
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
        BNDDelete(nbnd, bndind,  bndptr, higain);

      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        oldgain = ed[k]-id[k];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        /* Update its boundary information and queue position */
        if (bndptr[k] != -1) { /* If k was a boundary vertex */
          if (ed[k] == 0) { /* Not a boundary vertex any more */
            BNDDelete(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)  /* Remove it if in the queues */
              PQueueDelete(&parts[qnum[k]][where[k]], k, oldgain);
          }
          else { /* If it has not been moved, update its position in the queue */
            if (moved[k] == -1)
              PQueueUpdate(&parts[qnum[k]][where[k]], k, oldgain, ed[k]-id[k]);
          }
        }
        else {
          if (ed[k] > 0) {  /* It will now become a boundary vertex */
            BNDInsert(nbnd, bndind, bndptr, k);
            if (moved[k] == -1) 
              PQueueInsert(&parts[qnum[k]][where[k]], k, ed[k]-id[k]);
          }
        }
      }

    }


    /****************************************************************
    * Roll back computations
    *****************************************************************/
    for (i=0; i<nswaps; i++)
      moved[swaps[i]] = -1;  /* reset moved array */
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      to = where[higain] = (where[higain]+1)%2;
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);
      else if (ed[higain] > 0 && bndptr[higain] == -1)
        BNDInsert(nbnd, bndind,  bndptr, higain);

      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        if (bndptr[k] != -1 && ed[k] == 0)
          BNDDelete(nbnd, bndind, bndptr, k);
        if (bndptr[k] == -1 && ed[k] > 0)
          BNDInsert(nbnd, bndind, bndptr, k);
      }
    }

/*
    if (ctrl->dbglvl&DBG_REFINE) {
      printf("\tMincut: %6d at %5d, NBND: %6d, NPwgts: [", mincut, mincutorder, nbnd);
      for (l=0; l<ncon; l++)
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf("], LB: %.3f\n", Compute2WayHLoadImbalance(ncon, npwgts, tpwgts));
    }
*/

    graph->mincut = mincut;
    graph->nbnd = nbnd;

    if (mincutorder == -1 || mincut == initcut)
      break;
  }

  for (i=0; i<ncon; i++) {
    PQueueFree(ctrl, &parts[i][0]);
    PQueueFree(ctrl, &parts[i][1]);
  }

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}
Exemplo n.º 29
0
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
{
  int h, i, ii, iii, j, k, c;
  int pass, nvtxs, nedges, ncon;
  int nmoves, nmoved, nswaps, nzgswaps;
/*  int gnswaps, gnzgswaps; */
  int me, firstvtx, lastvtx, yourlastvtx;
  int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight;
  int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
  int nlupd, nsupd, nnbrs, nchanged;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  idxtype *where, *tmp_where, *moved;
  floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill;
  idxtype *update, *supdate, *rupdate, *pe_updates;
  idxtype *changed, *perm, *pperm, *htable;
  idxtype *peind, *recvptr, *sendptr;
  KeyValueType *swchanges, *rwchanges;
  RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
  EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
  floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
  int *nupds_pe;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));

  /*************************/
  /* set up common aliases */
  /*************************/
  nvtxs = graph->nvtxs;
  nedges = graph->nedges;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  where   = graph->where;
  rinfo   = graph->rinfo;
  lnpwgts = graph->lnpwgts;
  gnpwgts = graph->gnpwgts;
  ubvec   = ctrl->ubvec;
  tpwgts  = ctrl->tpwgts;

  nnbrs = graph->nnbrs;
  peind = graph->peind;
  recvptr = graph->recvptr;
  sendptr = graph->sendptr;

  changed = idxmalloc(nvtxs, "KWR: changed");
  rwchanges = wspace->pairs;
  swchanges = rwchanges + recvptr[nnbrs];

  /************************************/
  /* set up important data structures */
  /************************************/
  perm = idxmalloc(nvtxs, "KWR: perm");
  pperm = idxmalloc(nparts, "KWR: pperm");

  update = idxmalloc(nvtxs, "KWR: update");
  supdate = wspace->indices;
  rupdate = supdate + recvptr[nnbrs];
  nupds_pe = imalloc(npes, "KWR: nupds_pe");
  htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
  badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");

  for (i=0; i<nparts; i++) {
    for (h=0; h<ncon; h++) {
      badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];
    }
  }

  movewgts = fmalloc(nparts*ncon, "KWR: movewgts");
  ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts");
  pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts");
  overfill = fmalloc(nparts*ncon, "KWR: overfill");
  moved = idxmalloc(nvtxs, "KWR: moved");
  tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
  tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
  tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");

  idxcopy(nvtxs+graph->nrecv, where, tmp_where);
  for (i=0; i<nvtxs; i++) {
    tmp_rinfo[i].id = rinfo[i].id;
    tmp_rinfo[i].ed = rinfo[i].ed;
    tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
    tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];

    for (j=0; j<rinfo[i].ndegrees; j++) {
      tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
      tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;
    }
  }

  nswaps = nzgswaps = 0;
  /*********************************************************/
  /* perform a small number of passes through the vertices */
  /*********************************************************/
  for (pass=0; pass<npasses; pass++) {
    if (mype == 0)
      RandomPermute(nparts, pperm, 1);
    MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
    FastRandomPermute(nvtxs, perm, 1);
    oldcut = graph->mincut;

    /* check to see if the partitioning is imbalanced */
    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    ubavg = savg(ncon, ubvec);
    lbavg = savg(ncon, lbvec);
    imbalanced = (lbavg > ubavg) ? 1 : 0;

    for (c=0; c<2; c++) {
      scopy(ncon*nparts, gnpwgts, ognpwgts);
      sset(ncon*nparts, 0.0, movewgts);
      nmoved = 0;

      /**********************************************/
      /* PASS ONE -- record stats for desired moves */
      /**********************************************/
      for (iii=0; iii<nvtxs; iii++) {
        i = perm[iii];
        from = tmp_where[i];
        nvwgt = graph->nvwgt+i*ncon;

        for (h=0; h<ncon; h++)
          if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)
            break;

        if (h < ncon) {
          continue;
        }

        /* check for a potential improvement */
        if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
            to = my_edegrees[k].edge;
            if (ProperSide(c, pperm[from], pperm[to])) {
              for (h=0; h<ncon; h++)
                if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                  break;

              if (h == ncon)
                break;
            }
          }
          oldto = to;

          /* check if a subdomain was found that fits */
          if (k < tmp_rinfo[i].ndegrees) {
            for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
              to = my_edegrees[j].edge;
              if (ProperSide(c, pperm[from], pperm[to])) {
                for (h=0; h<ncon; h++)
                  if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                    break;

                if (h == ncon) {
                  if (my_edegrees[j].ewgt > my_edegrees[k].ewgt ||
                   (my_edegrees[j].ewgt == my_edegrees[k].ewgt &&
                   IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){
                    k = j;
                    oldto = my_edegrees[k].edge;
                  }
                }
              }
            }
            to = oldto;

            if (my_edegrees[k].ewgt > tmp_rinfo[i].id ||
            (my_edegrees[k].ewgt == tmp_rinfo[i].id &&
            (imbalanced ||  graph->level > 3  || iii % 8 == 0) &&
            IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){

              /****************************************/
              /* Update tmp arrays of the moved vertex */
              /****************************************/
              tmp_where[i] = to;
              moved[nmoved++] = i;
              for (h=0; h<ncon; h++) {
                lnpwgts[to*ncon+h] += nvwgt[h];
                lnpwgts[from*ncon+h] -= nvwgt[h];
                gnpwgts[to*ncon+h] += nvwgt[h];
                gnpwgts[from*ncon+h] -= nvwgt[h];
                movewgts[to*ncon+h] += nvwgt[h];
                movewgts[from*ncon+h] -= nvwgt[h];
              }

              tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
              SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
              if (my_edegrees[k].ewgt == 0) {
                tmp_rinfo[i].ndegrees--;
                my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
                my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
              }
              else {
                my_edegrees[k].edge = from;
              }

              /* Update the degrees of adjacent vertices */
              for (j=xadj[i]; j<xadj[i+1]; j++) {
                /* no need to bother about vertices on different pe's */
                if (ladjncy[j] >= nvtxs)
                  continue;

                me = ladjncy[j];
                mydomain = tmp_where[me];

                myrinfo = tmp_rinfo+me;
                your_edegrees = myrinfo->degrees;

                if (mydomain == from) {
                  INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
                }
                else {
                  if (mydomain == to) {
                    INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                  }
                }

                /* Remove contribution from the .ed of 'from' */
                if (mydomain != from) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == from) {
                      if (your_edegrees[k].ewgt == adjwgt[j]) {
                        myrinfo->ndegrees--;
                        your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                        your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                      }
                      else {
                        your_edegrees[k].ewgt -= adjwgt[j];
                      }
                      break;
                    }
                  }
                }

                /* Add contribution to the .ed of 'to' */
                if (mydomain != to) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == to) {
                      your_edegrees[k].ewgt += adjwgt[j];
                      break;
                    }
                  }
                  if (k == myrinfo->ndegrees) {
                    your_edegrees[myrinfo->ndegrees].edge = to;
                    your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                  }
                }
              }
            }
          }
        }
      }

      /******************************************/
      /* Let processors know the subdomain wgts */
      /* if all proposed moves commit.          */
      /******************************************/
      MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);

      /**************************/
      /* compute overfill array */
      /**************************/
      overweight = 0;
      for (j=0; j<nparts; j++) {
        for (h=0; h<ncon; h++) {
          if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) {
            overfill[j*ncon+h] =
            (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) /
            (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]);
          }
          else {
            overfill[j*ncon+h] = 0.0;
          }

          overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0);
          overfill[j*ncon+h] *= movewgts[j*ncon+h];

          if (overfill[j*ncon+h] > 0.0)
            overweight = 1;

          ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] ||
          pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h],
          (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h],
          badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h]));
        }
      }

      /****************************************************/
      /* select moves to undo according to overfill array */
      /****************************************************/
      if (overweight == 1) {
        for (iii=0; iii<nmoved; iii++) {
          i = moved[iii];
          oldto = tmp_where[i];
          nvwgt = graph->nvwgt+i*ncon;
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++)
            if (my_edegrees[k].edge == where[i])
              break;

          for (h=0; h<ncon; h++)
            if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0)
              break;

          /**********************************/
          /* nullify this move if necessary */
          /**********************************/
          if (k != tmp_rinfo[i].ndegrees && h != ncon) {
            moved[iii] = -1;
            from = oldto;
            to = where[i];

            for (h=0; h<ncon; h++) {
              overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0);
            }

            tmp_where[i] = to;
            tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
            SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
            if (my_edegrees[k].ewgt == 0) {
              tmp_rinfo[i].ndegrees--;
              my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
              my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
            }
            else {
              my_edegrees[k].edge = from;
            }

            for (h=0; h<ncon; h++) {
              lnpwgts[to*ncon+h] += nvwgt[h];
              lnpwgts[from*ncon+h] -= nvwgt[h];
            }

            /* Update the degrees of adjacent vertices */
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              /* no need to bother about vertices on different pe's */
              if (ladjncy[j] >= nvtxs)
                continue;

              me = ladjncy[j];
              mydomain = tmp_where[me];

              myrinfo = tmp_rinfo+me;
              your_edegrees = myrinfo->degrees;

              if (mydomain == from) {
                INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
              }
              else {
                if (mydomain == to) {
                  INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                }
              }

              /* Remove contribution from the .ed of 'from' */
              if (mydomain != from) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == from) {
                    if (your_edegrees[k].ewgt == adjwgt[j]) {
                      myrinfo->ndegrees--;
                      your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                      your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                    }
                    else {
                      your_edegrees[k].ewgt -= adjwgt[j];
                    }
                    break;
                  }
                }
              }

              /* Add contribution to the .ed of 'to' */
              if (mydomain != to) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == to) {
                    your_edegrees[k].ewgt += adjwgt[j];
                    break;
                  }
                }
                if (k == myrinfo->ndegrees) {
                  your_edegrees[myrinfo->ndegrees].edge = to;
                  your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                }
              }
            }
          }
        }
      }

      /*************************************************/
      /* PASS TWO -- commit the remainder of the moves */
      /*************************************************/
      nlupd = nsupd = nmoves = nchanged = 0;
      for (iii=0; iii<nmoved; iii++) {
        i = moved[iii];
        if (i == -1)
          continue;

        where[i] = tmp_where[i];

        /* Make sure to update the vertex information */
        if (htable[i] == 0) {
          /* make sure you do the update */
          htable[i] = 1;
          update[nlupd++] = i;
        }

        /* Put the vertices adjacent to i into the update array */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = ladjncy[j];
          if (htable[k] == 0) {
            htable[k] = 1;
            if (k<nvtxs)
              update[nlupd++] = k;
            else
              supdate[nsupd++] = k;
          }
        }
        nmoves++;
        nswaps++;

        /* check number of zero-gain moves */
        for (k=0; k<rinfo[i].ndegrees; k++)
          if (rinfo[i].degrees[k].edge == to)
            break;
        if (rinfo[i].id == rinfo[i].degrees[k].ewgt)
          nzgswaps++;

        if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
          changed[nchanged++] = i;
      }

      /* Tell interested pe's the new where[] info for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
      swchanges, rwchanges, wspace->pv4); 


      IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
      rprintf(ctrl, "\t[%d %d], [%.4f],  [%d %d %d]\n",
      pass, c, badmaxpwgt[0],
      GlobalSESum(ctrl, nmoves),
      GlobalSESum(ctrl, nsupd),
      GlobalSESum(ctrl, nlupd)));

      /*-------------------------------------------------------------
      / Time to communicate with processors to send the vertices
      / whose degrees need to be update.
      /-------------------------------------------------------------*/
      /* Issue the receives first */
      for (i=0; i<nnbrs; i++) {
        MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
                  peind[i], 1, ctrl->comm, ctrl->rreq+i);
      }

      /* Issue the sends next. This needs some preporcessing */
      for (i=0; i<nsupd; i++) {
        htable[supdate[i]] = 0;
        supdate[i] = graph->imap[supdate[i]];
      }
      iidxsort(nsupd, supdate);

      for (j=i=0; i<nnbrs; i++) {
        yourlastvtx = vtxdist[peind[i]+1];
        for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); 
        MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
        j = k;
      }

      /* OK, now get into the loop waiting for the send/recv operations to finish */
      MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
      for (i=0; i<nnbrs; i++) 
        MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
      MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);


      /*-------------------------------------------------------------
      / Place the recieved to-be updated vertices into update[] 
      /-------------------------------------------------------------*/
      for (i=0; i<nnbrs; i++) {
        pe_updates = rupdate+sendptr[i];
        for (j=0; j<nupds_pe[i]; j++) {
          k = pe_updates[j];
          if (htable[k-firstvtx] == 0) {
            htable[k-firstvtx] = 1;
            update[nlupd++] = k-firstvtx;
          }
        }
      }


      /*-------------------------------------------------------------
      / Update the rinfo of the vertices in the update[] array
      /-------------------------------------------------------------*/
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        ASSERT(ctrl, htable[i] == 1);

        htable[i] = 0;

        mydomain = where[i];
        myrinfo = rinfo+i;
        tmp_myrinfo = tmp_rinfo+i;
        my_edegrees = myrinfo->degrees;
        your_edegrees = tmp_myrinfo->degrees;

        graph->lmincut -= myrinfo->ed;
        myrinfo->ndegrees = 0;
        myrinfo->id = 0;
        myrinfo->ed = 0;

        for (j=xadj[i]; j<xadj[i+1]; j++) {
          yourdomain = where[ladjncy[j]];
          if (mydomain != yourdomain) {
            myrinfo->ed += adjwgt[j];

            for (k=0; k<myrinfo->ndegrees; k++) {
              if (my_edegrees[k].edge == yourdomain) {
                my_edegrees[k].ewgt += adjwgt[j];
                your_edegrees[k].ewgt += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              my_edegrees[k].edge = yourdomain;
              my_edegrees[k].ewgt = adjwgt[j];
              your_edegrees[k].edge = yourdomain;
              your_edegrees[k].ewgt = adjwgt[j];
              myrinfo->ndegrees++;
            }
            ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
            ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);

          }
          else {
            myrinfo->id += adjwgt[j];
          }
        }
        graph->lmincut += myrinfo->ed;

        tmp_myrinfo->id = myrinfo->id;
        tmp_myrinfo->ed = myrinfo->ed;
        tmp_myrinfo->ndegrees = myrinfo->ndegrees;
      }

      /* finally, sum-up the partition weights */
      MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);
    }
    graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

    if (graph->mincut == oldcut)
      break;
  }

/*
  gnswaps = GlobalSESum(ctrl, nswaps);
  gnzgswaps = GlobalSESum(ctrl, nzgswaps);
  if (mype == 0)
    printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps);
*/

  GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
  GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
  GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM);
  GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
}
Exemplo n.º 30
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void FM_2WayEdgeRefine(CtrlType *ctrl, GraphType *graph, int *tpwgts, int npasses)
{
  int i, ii, j, k, kwgt, nvtxs, nbnd, nswaps, from, to, pass, me, limit, tmp;
  idxtype *xadj, *vwgt, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind, *pwgts;
  idxtype *moved, *swaps, *perm;
  PQueueType parts[2];
  int higain, oldgain, mincut, mindiff, origdiff, initcut, newcut, mincutorder, avgvwgt;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  vwgt = graph->vwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->id;
  ed = graph->ed;
  pwgts = graph->pwgts;
  bndptr = graph->bndptr;
  bndind = graph->bndind;

  moved = idxwspacemalloc(ctrl, nvtxs);
  swaps = idxwspacemalloc(ctrl, nvtxs);
  perm = idxwspacemalloc(ctrl, nvtxs);

  limit = (int) amin(amax(0.01*nvtxs, 15), 100);
  avgvwgt = amin((pwgts[0]+pwgts[1])/20, 2*(pwgts[0]+pwgts[1])/nvtxs);

  tmp = graph->adjwgtsum[idxamax(nvtxs, graph->adjwgtsum)];
  PQueueInit(ctrl, &parts[0], nvtxs, tmp);
  PQueueInit(ctrl, &parts[1], nvtxs, tmp);

  IFSET(ctrl->dbglvl, DBG_REFINE, 
     printf("Partitions: [%6d %6d] T[%6d %6d], Nv-Nb[%6d %6d]. ICut: %6d\n",
             pwgts[0], pwgts[1], tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut));

  origdiff = abs(tpwgts[0]-pwgts[0]);
  idxset(nvtxs, -1, moved);
  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */
    PQueueReset(&parts[0]);
    PQueueReset(&parts[1]);

    mincutorder = -1;
    newcut = mincut = initcut = graph->mincut;
    mindiff = abs(tpwgts[0]-pwgts[0]);

    ASSERT(ComputeCut(graph, where) == graph->mincut);
    ASSERT(CheckBnd(graph));

    /* Insert boundary nodes in the priority queues */
    nbnd = graph->nbnd;
    RandomPermute(nbnd, perm, 1);
    for (ii=0; ii<nbnd; ii++) {
      i = perm[ii];
      ASSERT(ed[bndind[i]] > 0 || id[bndind[i]] == 0);
      ASSERT(bndptr[bndind[i]] != -1);
      PQueueInsert(&parts[where[bndind[i]]], bndind[i], ed[bndind[i]]-id[bndind[i]]);
    }

    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      from = (tpwgts[0]-pwgts[0] < tpwgts[1]-pwgts[1] ? 0 : 1);
      to = (from+1)%2;

      if ((higain = PQueueGetMax(&parts[from])) == -1)
        break;
      ASSERT(bndptr[higain] != -1);

      newcut -= (ed[higain]-id[higain]);
      INC_DEC(pwgts[to], pwgts[from], vwgt[higain]);

      if ((newcut < mincut && abs(tpwgts[0]-pwgts[0]) <= origdiff+avgvwgt) || 
          (newcut == mincut && abs(tpwgts[0]-pwgts[0]) < mindiff)) {
        mincut = newcut;
        mindiff = abs(tpwgts[0]-pwgts[0]);
        mincutorder = nswaps;
      }
      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
        newcut += (ed[higain]-id[higain]);
        INC_DEC(pwgts[from], pwgts[to], vwgt[higain]);
        break;
      }

      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;

      IFSET(ctrl->dbglvl, DBG_MOVEINFO, 
        printf("Moved %6d from %d. [%3d %3d] %5d [%4d %4d]\n", higain, from, ed[higain]-id[higain], vwgt[higain], newcut, pwgts[0], pwgts[1]));

      /**************************************************************
      * Update the id[i]/ed[i] values of the affected nodes
      ***************************************************************/
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1]) 
        BNDDelete(nbnd, bndind,  bndptr, higain);

      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        oldgain = ed[k]-id[k];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        /* Update its boundary information and queue position */
        if (bndptr[k] != -1) { /* If k was a boundary vertex */
          if (ed[k] == 0) { /* Not a boundary vertex any more */
            BNDDelete(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)  /* Remove it if in the queues */
              PQueueDelete(&parts[where[k]], k, oldgain);
          }
          else { /* If it has not been moved, update its position in the queue */
            if (moved[k] == -1)
              PQueueUpdate(&parts[where[k]], k, oldgain, ed[k]-id[k]);
          }
        }
        else {
          if (ed[k] > 0) {  /* It will now become a boundary vertex */
            BNDInsert(nbnd, bndind, bndptr, k);
            if (moved[k] == -1) 
              PQueueInsert(&parts[where[k]], k, ed[k]-id[k]);
          }
        }
      }

    }


    /****************************************************************
    * Roll back computations
    *****************************************************************/
    for (i=0; i<nswaps; i++)
      moved[swaps[i]] = -1;  /* reset moved array */
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      to = where[higain] = (where[higain]+1)%2;
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);
      else if (ed[higain] > 0 && bndptr[higain] == -1)
        BNDInsert(nbnd, bndind,  bndptr, higain);

      INC_DEC(pwgts[to], pwgts[(to+1)%2], vwgt[higain]);
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        if (bndptr[k] != -1 && ed[k] == 0)
          BNDDelete(nbnd, bndind, bndptr, k);
        if (bndptr[k] == -1 && ed[k] > 0)
          BNDInsert(nbnd, bndind, bndptr, k);
      }
    }

    IFSET(ctrl->dbglvl, DBG_REFINE, 
      printf("\tMinimum cut: %6d at %5d, PWGTS: [%6d %6d], NBND: %6d\n", mincut, mincutorder, pwgts[0], pwgts[1], nbnd));

    graph->mincut = mincut;
    graph->nbnd = nbnd;

    if (mincutorder == -1 || mincut == initcut)
      break;
  }

  PQueueFree(ctrl, &parts[0]);
  PQueueFree(ctrl, &parts[1]);

  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);
  idxwspacefree(ctrl, nvtxs);

}