예제 #1
0
파일: ometis.c 프로젝트: Nasrollah/phasta
/*************************************************************************
* This function takes a graph and produces a bisection of it
**************************************************************************/
void MlevelNestedDissectionCC(CtrlType *ctrl, GraphType *graph, idxtype *order, float ubfactor, int lastvtx)
{
  int i, j, nvtxs, nbnd, tvwgt, tpwgts2[2], nsgraphs, ncmps, rnvtxs;
  idxtype *label, *bndind;
  idxtype *cptr, *cind;
  GraphType *sgraphs;

  nvtxs = graph->nvtxs;

  /* Determine the weights of the partitions */
  tvwgt = idxsum(nvtxs, graph->vwgt);
  tpwgts2[0] = tvwgt/2;
  tpwgts2[1] = tvwgt-tpwgts2[0];

  MlevelNodeBisectionMultiple(ctrl, graph, tpwgts2, ubfactor);
  IFSET(ctrl->dbglvl, DBG_SEPINFO, printf("Nvtxs: %6d, [%6d %6d %6d]\n", graph->nvtxs, graph->pwgts[0], graph->pwgts[1], graph->pwgts[2]));

  /* Order the nodes in the separator */
  nbnd = graph->nbnd;
  bndind = graph->bndind;
  label = graph->label;
  for (i=0; i<nbnd; i++) 
    order[label[bndind[i]]] = --lastvtx;

  cptr = idxmalloc(nvtxs, "MlevelNestedDissectionCC: cptr");
  cind = idxmalloc(nvtxs, "MlevelNestedDissectionCC: cind");
  ncmps = FindComponents(ctrl, graph, cptr, cind);

/*
  if (ncmps > 2)
    printf("[%5d] has %3d components\n", nvtxs, ncmps);
*/

  sgraphs = (GraphType *)GKmalloc(ncmps*sizeof(GraphType), "MlevelNestedDissectionCC: sgraphs");

  nsgraphs = SplitGraphOrderCC(ctrl, graph, sgraphs, ncmps, cptr, cind);

  GKfree(&cptr, &cind, LTERM);

  /* Free the memory of the top level graph */
  GKfree(&graph->gdata, &graph->rdata, &graph->label, LTERM);

  /* Go and process the subgraphs */
  for (rnvtxs=i=0; i<nsgraphs; i++) {
    if (sgraphs[i].adjwgt == NULL) {
      MMDOrder(ctrl, sgraphs+i, order, lastvtx-rnvtxs);
      GKfree(&sgraphs[i].gdata, &sgraphs[i].label, LTERM);
    }
    else {
      MlevelNestedDissectionCC(ctrl, sgraphs+i, order, ubfactor, lastvtx-rnvtxs);
    }
    rnvtxs += sgraphs[i].nvtxs;
  }

  free(sgraphs);
}
예제 #2
0
/*************************************************************************
* This function creates a CoarseGraphType data structure and initializes
* the various fields
**************************************************************************/
GraphType *CreateGraph(void)
{
  GraphType *graph;

  graph = (GraphType *)GKmalloc(sizeof(GraphType), "CreateCoarseGraph: graph");

  InitGraph(graph);

  return graph;
}
예제 #3
0
void AllocateWSpace(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
    wspace->nlarge  = 2*graph->nedges;
    wspace->nparts  = ctrl->nparts;
    wspace->npes    = ctrl->npes;

    wspace->maxcore = 8*graph->nedges+1;
    wspace->core    = idxmalloc(wspace->maxcore, "AllocateWSpace: wspace->core");

    wspace->pairs   = (KeyValueType *)wspace->core;
    wspace->indices = (idxtype *)(wspace->pairs + wspace->nlarge);
    wspace->degrees = (EdgeType *)(wspace->indices + wspace->nlarge);


    wspace->pv1 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv1");
    wspace->pv2 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv2");
    wspace->pv3 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv3");
    wspace->pv4 = idxmalloc(ctrl->nparts+ctrl->npes+1, "AllocateWSpace: wspace->pv4");

    wspace->pepairs1 = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(ctrl->nparts+ctrl->npes+1), "AllocateWSpace: wspace->pepairs?");
    wspace->pepairs2 = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(ctrl->nparts+ctrl->npes+1), "AllocateWSpace: wspace->pepairs?");

}
예제 #4
0
Matrix* allocMatrix(int nvtxs, int nedges, int allocSum, int
						allocMax, int allocAttractor)
{
	Matrix* a;
	a=(Matrix*)malloc(sizeof(Matrix));
	a->nvtxs=nvtxs;
	a->nnz=nedges;
	a->xadj = (idxtype*) malloc( sizeof(idxtype)*(nvtxs+1) ); 
//	a->xadj = (idxtype*) GKmalloc( sizeof(idxtype)*(nvtxs+1) , 
//					"allocMatrix:xadj" );
	a->adjncy=(idxtype*) GKmalloc(sizeof(idxtype)*nedges , 
						"allocMatrix:adjncy" );
	a->adjwgt=(wgttype*)GKmalloc( sizeof(wgttype)*nedges , 
						"allocMatrix:adjwgt" );
	if ( allocSum )
		a->adjwgtsum=(wgttype*)malloc(sizeof(wgttype)*nvtxs);
	else
		a->adjwgtsum=NULL;
		
 	if ( allocSum )
		a->maxwgt=(wgttype*)malloc(sizeof(wgttype)*nvtxs);
	else
		a->maxwgt=NULL;

	if ( allocAttractor )
	{
		a->attractors=(idxtype*)malloc(sizeof(idxtype)*nvtxs);
//		printf("Allocating attractors\n");
	}
	else
		a->attractors=NULL;

	a->rmap=NULL;
	a->currentSize = nedges;
	return a;
}
예제 #5
0
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void Mc_SerialKWayAdaptRefine(GraphType *graph, int nparts, idxtype *home,
     float *orgubvec, int npasses)
{
  int i, ii, iii, j, k;
  int nvtxs, ncon, pass, nmoves, myndegrees;
  int from, me, myhome, to, oldcut, gain, tmp;
  idxtype *xadj, *adjncy, *adjwgt;
  idxtype *where;
  EdgeType *mydegrees;
  RInfoType *rinfo, *myrinfo;
  float *npwgts, *nvwgt, *minwgt, *maxwgt, ubvec[MAXNCON];
  int gain_is_greater, gain_is_same, fit_in_to, fit_in_from, going_home;
  int zero_gain, better_balance_ft, better_balance_tt;
  KeyValueType *cand;
int mype;
MPI_Comm_rank(MPI_COMM_WORLD, &mype);

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  rinfo = graph->rinfo;
  npwgts = graph->gnpwgts;
  
  /* Setup the weight intervals of the various subdomains */
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");
  minwgt =  fmalloc(nparts*ncon, "minwgt");
  maxwgt = fmalloc(nparts*ncon, "maxwgt");

  ComputeHKWayLoadImbalance(ncon, nparts, npwgts, ubvec);
  for (i=0; i<ncon; i++)
    ubvec[i] = amax(ubvec[i], orgubvec[i]);

  for (i=0; i<nparts; i++) {
    for (j=0; j<ncon; j++) {
      maxwgt[i*ncon+j] = ubvec[j]/(float)nparts;
      minwgt[i*ncon+j] = ubvec[j]*(float)nparts;
    }
  }

  for (pass=0; pass<npasses; pass++) {
    oldcut = graph->mincut;

    for (i=0; i<nvtxs; i++) {
      cand[i].key = rinfo[i].id-rinfo[i].ed;
      cand[i].val = i;
    }
    ikeysort(nvtxs, cand);

    nmoves = 0;
    for (iii=0; iii<nvtxs; iii++) {
      i = cand[iii].val;

      myrinfo = rinfo+i;

      if (myrinfo->ed >= myrinfo->id) {
        from = where[i];
        myhome = home[i];
        nvwgt = graph->nvwgt+i*ncon;

        if (myrinfo->id > 0 &&
        AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, -1.0, nvwgt, minwgt+from*ncon)) 
          continue;

        mydegrees = myrinfo->degrees;
        myndegrees = myrinfo->ndegrees;

        for (k=0; k<myndegrees; k++) {
          to = mydegrees[k].edge;
          gain = mydegrees[k].ewgt - myrinfo->id; 
          if (gain >= 0 && 
             (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
             IsHBalanceBetterFT(ncon,npwgts+from*ncon,npwgts+to*ncon,nvwgt,ubvec))) {
            break;
          }
        }

        /* break out if you did not find a candidate */
        if (k == myndegrees)
          continue;

        for (j=k+1; j<myndegrees; j++) {
          to = mydegrees[j].edge;
          going_home = (myhome == to);
          gain_is_same = (mydegrees[j].ewgt == mydegrees[k].ewgt);
          gain_is_greater = (mydegrees[j].ewgt > mydegrees[k].ewgt);
          fit_in_to = AreAllHVwgtsBelow(ncon,1.0,npwgts+to*ncon,1.0,nvwgt,maxwgt+to*ncon);
          better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
                              npwgts+to*ncon,nvwgt,ubvec);
          better_balance_tt = IsHBalanceBetterTT(ncon,npwgts+mydegrees[k].edge*ncon,
                              npwgts+to*ncon,nvwgt,ubvec);

          if (
               (gain_is_greater &&
                 (fit_in_to ||
                  better_balance_ft)
               )
            ||
               (gain_is_same &&
                 (
                   (fit_in_to &&
                    going_home)
                ||
                    better_balance_tt
                 )
               )
             ) {
            k = j;
          }
        }

        to = mydegrees[k].edge;
        going_home = (myhome == to);
        zero_gain = (mydegrees[k].ewgt == myrinfo->id);

        fit_in_from = AreAllHVwgtsBelow(ncon,1.0,npwgts+from*ncon,0.0,npwgts+from*ncon,
                      maxwgt+from*ncon);
        better_balance_ft = IsHBalanceBetterFT(ncon,npwgts+from*ncon,
                            npwgts+to*ncon,nvwgt,ubvec);

        if (zero_gain &&
            !going_home &&
            !better_balance_ft &&
            fit_in_from)
          continue;

        /*=====================================================================
        * If we got here, we can now move the vertex from 'from' to 'to' 
        *======================================================================*/
        graph->mincut -= mydegrees[k].ewgt-myrinfo->id;

        /* Update where, weight, and ID/ED information of the vertex you moved */
        saxpy2(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
        saxpy2(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
        where[i] = to;
        myrinfo->ed += myrinfo->id-mydegrees[k].ewgt;
        SWAP(myrinfo->id, mydegrees[k].ewgt, tmp);

        if (mydegrees[k].ewgt == 0) {
          myrinfo->ndegrees--;
          mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
          mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
        }
        else
          mydegrees[k].edge = from;

        /* Update the degrees of adjacent vertices */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          ii = adjncy[j];
          me = where[ii];

          myrinfo = rinfo+ii;
          mydegrees = myrinfo->degrees;

          if (me == from) {
            INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
          }
          else {
            if (me == to) {
              INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
            }
          }

          /* Remove contribution of the ed from 'from' */
          if (me != from) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (mydegrees[k].edge == from) {
                if (mydegrees[k].ewgt == adjwgt[j]) {
                  myrinfo->ndegrees--;
                  mydegrees[k].edge = mydegrees[myrinfo->ndegrees].edge;
                  mydegrees[k].ewgt = mydegrees[myrinfo->ndegrees].ewgt;
                }
                else
                  mydegrees[k].ewgt -= adjwgt[j];
                break;
              }
            }
          }

          /* Add contribution of the ed to 'to' */
          if (me != to) {
            for (k=0; k<myrinfo->ndegrees; k++) {
              if (mydegrees[k].edge == to) {
                mydegrees[k].ewgt += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              mydegrees[myrinfo->ndegrees].edge = to;
              mydegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
            }
          }

        }
        nmoves++;
      }
    }

    if (graph->mincut == oldcut)
      break;
  }

  GKfree((void **)&minwgt, (void **)&maxwgt, (void **)&cand, LTERM);

  return;
}
예제 #6
0
/*************************************************************************
* This function balances two partitions by moving the highest gain
* (including negative gain) vertices to the other domain.
* It is used only when tha unbalance is due to non contigous
* subdomains. That is, the are no boundary vertices.
* It moves vertices from the domain that is overweight to the one that
* is underweight.
**************************************************************************/
void Mc_Serial_Init2WayBalance(GraphType *graph, float *tpwgts)
{
  int i, ii, j, k;
  int kwgt, nvtxs, nbnd, ncon, nswaps, from, to, cnum, tmp;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *qnum;
  float *nvwgt, *npwgts;
  FPQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut;
  KeyValueType *cand;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  nvwgt = graph->nvwgt;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->sendind;
  ed = graph->recvind;
  npwgts = graph->gnpwgts;
  bndptr = graph->sendptr;
  bndind = graph->recvptr;

  qnum = idxmalloc(nvtxs, "qnum");
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");

  /* This is called for initial partitioning so we know from where to pick nodes */
  from = 1;
  to = (from+1)%2;

  for (i=0; i<ncon; i++) {
    FPQueueInit(&parts[i][0], nvtxs);
    FPQueueInit(&parts[i][1], nvtxs);
  }

  /* Compute the queues in which each vertex will be assigned to */
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  for (i=0; i<nvtxs; i++) {
    cand[i].key = id[i]-ed[i];
    cand[i].val = i;
  }
  ikeysort(nvtxs, cand);

  /* Insert the nodes of the proper partition in the appropriate priority queue */
  for (ii=0; ii<nvtxs; ii++) {
    i = cand[ii].val;
    if (where[i] == from) {
      if (ed[i] > 0)
        FPQueueInsert(&parts[qnum[i]][0], i, (float)(ed[i]-id[i]));
      else
        FPQueueInsert(&parts[qnum[i]][1], i, (float)(ed[i]-id[i]));
    }
  }

  mincut = graph->mincut;
  nbnd = graph->gnvtxs;
  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (Serial_AreAnyVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, nvwgt, tpwgts+from*ncon))
      break;

    if ((cnum = Serial_SelectQueueOneWay(ncon, npwgts, tpwgts, from, parts)) == -1)
      break;


    if ((higain = FPQueueGetMax(&parts[cnum][0])) == -1)
      higain = FPQueueGetMax(&parts[cnum][1]);

    mincut -= (ed[higain]-id[higain]);
    saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

    where[higain] = to;

    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (where[k] == from) {
        if (ed[k] > 0 && bndptr[k] == -1) {  /* It moves in boundary */
          FPQueueDelete(&parts[qnum[k]][1], k);
          FPQueueInsert(&parts[qnum[k]][0], k, (float)(ed[k]-id[k]));
        }
        else { /* It must be in the boundary already */
          FPQueueUpdate(&parts[qnum[k]][0], k, (float)(oldgain), (float)(ed[k]-id[k]));
        }
      }

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1)
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  graph->mincut = mincut;
  graph->gnvtxs = nbnd;

  for (i=0; i<ncon; i++) {
    FPQueueFree(&parts[i][0]);
    FPQueueFree(&parts[i][1]);
  }

  GKfree((void **)&cand, (void **)&qnum, LTERM);
}
예제 #7
0
/*************************************************************************
* This function computes the assignment using the the objective the 
* minimization of the total volume of data that needs to move
**************************************************************************/
void ParallelTotalVReMap(CtrlType *ctrl, idxtype *lpwgts, idxtype *map,
     WorkSpaceType *wspace, int npasses, int ncon)
{
  int i, ii, j, k, nparts, mype;
  int pass, maxipwgt, nmapped, oldwgt, newwgt, done;
  idxtype *rowmap, *mylpwgts;
  KeyValueType *recv, send;
  int nsaved, gnsaved;

  mype   = ctrl->mype;
  nparts = ctrl->nparts;

  recv     = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nparts, "remap: recv");
  mylpwgts = idxmalloc(nparts, "mylpwgts");

  done = nmapped = 0;
  idxset(nparts, -1, map);
  rowmap = idxset(nparts, -1, wspace->pv3);
  idxcopy(nparts, lpwgts, mylpwgts);
  for (pass=0; pass<npasses; pass++) {
    maxipwgt = idxamax(nparts, mylpwgts);

    if (mylpwgts[maxipwgt] > 0 && !done) {
      send.key = -mylpwgts[maxipwgt];
      send.val = mype*nparts+maxipwgt;
    }
    else {
      send.key = 0;
      send.val = -1;
    }

    /* each processor sends its selection */
    MPI_Allgather((void *)&send, 2, IDX_DATATYPE, (void *)recv, 2, IDX_DATATYPE, ctrl->comm); 

    ikeysort(nparts, recv);
    if (recv[0].key == 0)
      break;

    /* now make as many assignments as possible */
    for (ii=0; ii<nparts; ii++) {
      i = recv[ii].val;

      if (i == -1)
        continue;

      j = i % nparts;
      k = i / nparts;
      if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, j, k)) {
        map[j] = k;
        rowmap[k] = j;
        nmapped++;
        mylpwgts[j] = 0;
        if (mype == k)
          done = 1;
      }

      if (nmapped == nparts)
        break;
    }

    if (nmapped == nparts)
      break;
  }

  /* Map unmapped partitions */
  if (nmapped < nparts) {
    for (i=j=0; j<nparts && nmapped<nparts; j++) {
      if (map[j] == -1) {
        for (; i<nparts; i++) {
          if (rowmap[i] == -1 && SimilarTpwgts(ctrl->tpwgts, ncon, i, j)) {
            map[j] = i;
            rowmap[i] = j;
            nmapped++;
            break;
          }
        }
      }
    }
  }

  /* check to see if remapping fails (due to dis-similar tpwgts) */
  /* if remapping fails, revert to original mapping */
  if (nmapped < nparts) {
    for (i=0; i<nparts; i++)
      map[i] = i; 
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %0\n")); 
  }
  else {
    /* check for a savings */
    oldwgt  = lpwgts[mype];
    newwgt  = lpwgts[rowmap[mype]];
    nsaved  = newwgt - oldwgt;
    gnsaved = GlobalSESum(ctrl, nsaved);

    /* undo everything if we don't see a savings */
    if (gnsaved <= 0) {
      for (i=0; i<nparts; i++)
        map[i] = i;
    }
    IFSET(ctrl->dbglvl, DBG_REMAP, rprintf(ctrl, "Savings from parallel remapping: %d\n", amax(0,gnsaved))); 
  }

  GKfree((void **)&recv, (void **)&mylpwgts, LTERM);

}
예제 #8
0
파일: xyzpart.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function implements a simple coordinate based partitioning
**************************************************************************/
void Coordinate_Partition(CtrlType *ctrl, GraphType *graph, int ndims, float *xyz, 
                          int setup, WorkSpaceType *wspace)
{
  int i, j, k, nvtxs, firstvtx, icoord, coords[3];
  idxtype *vtxdist;
  float max[3], min[3], gmin[3], gmax[3], shift[3], scale[3];
  KeyValueType *cand;

  if (setup)
    SetUp(ctrl, graph, wspace);
  else
    graph->nrecv = 0;

  nvtxs = graph->nvtxs;
  vtxdist = graph->vtxdist;

  firstvtx = vtxdist[ctrl->mype];

  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "Coordinate_Partition: cand");

  /* Compute parameters for coordinate transformation */
  for (k=0; k<ndims; k++) {
    min[k] = +10000000;
    max[k] = -10000000;
  }
  for (i=0; i<nvtxs; i++) {
    for (k=0; k<ndims; k++) {
      if (xyz[i*ndims+k] < min[k])
        min[k] = xyz[i*ndims+k];
      if (xyz[i*ndims+k] > max[k])
        max[k] = xyz[i*ndims+k];
    }
  }

  /* Compute global min and max */
  MPI_Allreduce((void *)min, (void *)gmin, ndims, MPI_FLOAT, MPI_MIN, ctrl->comm);
  MPI_Allreduce((void *)max, (void *)gmax, ndims, MPI_FLOAT, MPI_MAX, ctrl->comm);

  /* myprintf(ctrl, "Coordinate Range: %e %e, Global %e %e\n", min[0], max[0], gmin[0], gmax[0]); */

  for (k=0; k<ndims; k++) {
    /* rprintf(ctrl, "Dim#%d: %e %e, span: %e\n", k, gmin[k], gmax[k], gmax[k]-gmin[k]); */
    shift[k] = -gmin[k];
    if (gmax[k] != gmin[k])
      scale[k] = 1.0/(gmax[k]-gmin[k]);
    else
      scale[k] = 1.0;
  }

  switch (ctrl->xyztype) {
    case XYZ_XCOORD:
      for (i=0; i<nvtxs; i++) {
        cand[i].key = 1000000*((xyz[i*ndims]+shift[0])*scale[0]);
        ASSERT(ctrl, cand[i].key>=0 && cand[i].key<=1000000);
        cand[i].val = firstvtx+i;
      }
      break;
    case XYZ_SPFILL:
      for (i=0; i<nvtxs; i++) {
        for (k=0; k<ndims; k++)
          coords[k] = 1024*((xyz[i*ndims+k]+shift[k])*scale[k]);
        for (icoord=0, j=9; j>=0; j--) {
          for (k=0; k<ndims; k++)
            icoord = (icoord<<1) + (coords[k]&(1<<j) ? 1 : 0);
        }
        cand[i].key = icoord;
        cand[i].val = firstvtx+i;
      }
      break;
    default:
      errexit("Unknown XYZ_Type type!\n");
  }


  /* Partition using sorting */
  PartSort(ctrl, graph, cand, wspace);

  free(cand);

}
예제 #9
0
파일: kwayfm.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function performs k-way refinement
**************************************************************************/
void Moc_KWayFM(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace, int npasses)
{
  int h, i, ii, iii, j, k, c;
  int pass, nvtxs, nedges, ncon;
  int nmoves, nmoved, nswaps, nzgswaps;
/*  int gnswaps, gnzgswaps; */
  int me, firstvtx, lastvtx, yourlastvtx;
  int from, to = -1, oldto, oldcut, mydomain, yourdomain, imbalanced, overweight;
  int npes = ctrl->npes, mype = ctrl->mype, nparts = ctrl->nparts;
  int nlupd, nsupd, nnbrs, nchanged;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  idxtype *where, *tmp_where, *moved;
  floattype *lnpwgts, *gnpwgts, *ognpwgts, *pgnpwgts, *movewgts, *overfill;
  idxtype *update, *supdate, *rupdate, *pe_updates;
  idxtype *changed, *perm, *pperm, *htable;
  idxtype *peind, *recvptr, *sendptr;
  KeyValueType *swchanges, *rwchanges;
  RInfoType *rinfo, *myrinfo, *tmp_myrinfo, *tmp_rinfo;
  EdgeType *tmp_edegrees, *my_edegrees, *your_edegrees;
  floattype lbvec[MAXNCON], *nvwgt, *badmaxpwgt, *ubvec, *tpwgts, lbavg, ubavg;
  int *nupds_pe;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayTmr));

  /*************************/
  /* set up common aliases */
  /*************************/
  nvtxs = graph->nvtxs;
  nedges = graph->nedges;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  where   = graph->where;
  rinfo   = graph->rinfo;
  lnpwgts = graph->lnpwgts;
  gnpwgts = graph->gnpwgts;
  ubvec   = ctrl->ubvec;
  tpwgts  = ctrl->tpwgts;

  nnbrs = graph->nnbrs;
  peind = graph->peind;
  recvptr = graph->recvptr;
  sendptr = graph->sendptr;

  changed = idxmalloc(nvtxs, "KWR: changed");
  rwchanges = wspace->pairs;
  swchanges = rwchanges + recvptr[nnbrs];

  /************************************/
  /* set up important data structures */
  /************************************/
  perm = idxmalloc(nvtxs, "KWR: perm");
  pperm = idxmalloc(nparts, "KWR: pperm");

  update = idxmalloc(nvtxs, "KWR: update");
  supdate = wspace->indices;
  rupdate = supdate + recvptr[nnbrs];
  nupds_pe = imalloc(npes, "KWR: nupds_pe");
  htable = idxsmalloc(nvtxs+graph->nrecv, 0, "KWR: lhtable");
  badmaxpwgt = fmalloc(nparts*ncon, "badmaxpwgt");

  for (i=0; i<nparts; i++) {
    for (h=0; h<ncon; h++) {
      badmaxpwgt[i*ncon+h] = ubvec[h]*tpwgts[i*ncon+h];
    }
  }

  movewgts = fmalloc(nparts*ncon, "KWR: movewgts");
  ognpwgts = fmalloc(nparts*ncon, "KWR: ognpwgts");
  pgnpwgts = fmalloc(nparts*ncon, "KWR: pgnpwgts");
  overfill = fmalloc(nparts*ncon, "KWR: overfill");
  moved = idxmalloc(nvtxs, "KWR: moved");
  tmp_where = idxmalloc(nvtxs+graph->nrecv, "KWR: tmp_where");
  tmp_rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "KWR: tmp_rinfo");
  tmp_edegrees = (EdgeType *)GKmalloc(sizeof(EdgeType)*nedges, "KWR: tmp_edegrees");

  idxcopy(nvtxs+graph->nrecv, where, tmp_where);
  for (i=0; i<nvtxs; i++) {
    tmp_rinfo[i].id = rinfo[i].id;
    tmp_rinfo[i].ed = rinfo[i].ed;
    tmp_rinfo[i].ndegrees = rinfo[i].ndegrees;
    tmp_rinfo[i].degrees = tmp_edegrees+xadj[i];

    for (j=0; j<rinfo[i].ndegrees; j++) {
      tmp_rinfo[i].degrees[j].edge = rinfo[i].degrees[j].edge;
      tmp_rinfo[i].degrees[j].ewgt = rinfo[i].degrees[j].ewgt;
    }
  }

  nswaps = nzgswaps = 0;
  /*********************************************************/
  /* perform a small number of passes through the vertices */
  /*********************************************************/
  for (pass=0; pass<npasses; pass++) {
    if (mype == 0)
      RandomPermute(nparts, pperm, 1);
    MPI_Bcast((void *)pperm, nparts, IDX_DATATYPE, 0, ctrl->comm);
    FastRandomPermute(nvtxs, perm, 1);
    oldcut = graph->mincut;

    /* check to see if the partitioning is imbalanced */
    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    ubavg = savg(ncon, ubvec);
    lbavg = savg(ncon, lbvec);
    imbalanced = (lbavg > ubavg) ? 1 : 0;

    for (c=0; c<2; c++) {
      scopy(ncon*nparts, gnpwgts, ognpwgts);
      sset(ncon*nparts, 0.0, movewgts);
      nmoved = 0;

      /**********************************************/
      /* PASS ONE -- record stats for desired moves */
      /**********************************************/
      for (iii=0; iii<nvtxs; iii++) {
        i = perm[iii];
        from = tmp_where[i];
        nvwgt = graph->nvwgt+i*ncon;

        for (h=0; h<ncon; h++)
          if (fabs(nvwgt[h]-gnpwgts[from*ncon+h]) < SMALLFLOAT)
            break;

        if (h < ncon) {
          continue;
        }

        /* check for a potential improvement */
        if (tmp_rinfo[i].ed >= tmp_rinfo[i].id) {
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++) {
            to = my_edegrees[k].edge;
            if (ProperSide(c, pperm[from], pperm[to])) {
              for (h=0; h<ncon; h++)
                if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                  break;

              if (h == ncon)
                break;
            }
          }
          oldto = to;

          /* check if a subdomain was found that fits */
          if (k < tmp_rinfo[i].ndegrees) {
            for (j=k+1; j<tmp_rinfo[i].ndegrees; j++) {
              to = my_edegrees[j].edge;
              if (ProperSide(c, pperm[from], pperm[to])) {
                for (h=0; h<ncon; h++)
                  if (gnpwgts[to*ncon+h]+nvwgt[h] > badmaxpwgt[to*ncon+h] && nvwgt[h] > 0.0)
                    break;

                if (h == ncon) {
                  if (my_edegrees[j].ewgt > my_edegrees[k].ewgt ||
                   (my_edegrees[j].ewgt == my_edegrees[k].ewgt &&
                   IsHBalanceBetterTT(ncon,gnpwgts+oldto*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){
                    k = j;
                    oldto = my_edegrees[k].edge;
                  }
                }
              }
            }
            to = oldto;

            if (my_edegrees[k].ewgt > tmp_rinfo[i].id ||
            (my_edegrees[k].ewgt == tmp_rinfo[i].id &&
            (imbalanced ||  graph->level > 3  || iii % 8 == 0) &&
            IsHBalanceBetterFT(ncon,gnpwgts+from*ncon,gnpwgts+to*ncon,nvwgt,ubvec))){

              /****************************************/
              /* Update tmp arrays of the moved vertex */
              /****************************************/
              tmp_where[i] = to;
              moved[nmoved++] = i;
              for (h=0; h<ncon; h++) {
                lnpwgts[to*ncon+h] += nvwgt[h];
                lnpwgts[from*ncon+h] -= nvwgt[h];
                gnpwgts[to*ncon+h] += nvwgt[h];
                gnpwgts[from*ncon+h] -= nvwgt[h];
                movewgts[to*ncon+h] += nvwgt[h];
                movewgts[from*ncon+h] -= nvwgt[h];
              }

              tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
              SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
              if (my_edegrees[k].ewgt == 0) {
                tmp_rinfo[i].ndegrees--;
                my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
                my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
              }
              else {
                my_edegrees[k].edge = from;
              }

              /* Update the degrees of adjacent vertices */
              for (j=xadj[i]; j<xadj[i+1]; j++) {
                /* no need to bother about vertices on different pe's */
                if (ladjncy[j] >= nvtxs)
                  continue;

                me = ladjncy[j];
                mydomain = tmp_where[me];

                myrinfo = tmp_rinfo+me;
                your_edegrees = myrinfo->degrees;

                if (mydomain == from) {
                  INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
                }
                else {
                  if (mydomain == to) {
                    INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                  }
                }

                /* Remove contribution from the .ed of 'from' */
                if (mydomain != from) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == from) {
                      if (your_edegrees[k].ewgt == adjwgt[j]) {
                        myrinfo->ndegrees--;
                        your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                        your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                      }
                      else {
                        your_edegrees[k].ewgt -= adjwgt[j];
                      }
                      break;
                    }
                  }
                }

                /* Add contribution to the .ed of 'to' */
                if (mydomain != to) {
                  for (k=0; k<myrinfo->ndegrees; k++) {
                    if (your_edegrees[k].edge == to) {
                      your_edegrees[k].ewgt += adjwgt[j];
                      break;
                    }
                  }
                  if (k == myrinfo->ndegrees) {
                    your_edegrees[myrinfo->ndegrees].edge = to;
                    your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                  }
                }
              }
            }
          }
        }
      }

      /******************************************/
      /* Let processors know the subdomain wgts */
      /* if all proposed moves commit.          */
      /******************************************/
      MPI_Allreduce((void *)lnpwgts, (void *)pgnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);

      /**************************/
      /* compute overfill array */
      /**************************/
      overweight = 0;
      for (j=0; j<nparts; j++) {
        for (h=0; h<ncon; h++) {
          if (pgnpwgts[j*ncon+h] > ognpwgts[j*ncon+h]) {
            overfill[j*ncon+h] =
            (pgnpwgts[j*ncon+h]-badmaxpwgt[j*ncon+h]) /
            (pgnpwgts[j*ncon+h]-ognpwgts[j*ncon+h]);
          }
          else {
            overfill[j*ncon+h] = 0.0;
          }

          overfill[j*ncon+h] = amax(overfill[j*ncon+h], 0.0);
          overfill[j*ncon+h] *= movewgts[j*ncon+h];

          if (overfill[j*ncon+h] > 0.0)
            overweight = 1;

          ASSERTP(ctrl, ognpwgts[j*ncon+h] <= badmaxpwgt[j*ncon+h] ||
          pgnpwgts[j*ncon+h] <= ognpwgts[j*ncon+h],
          (ctrl, "%.4f %.4f %.4f\n", ognpwgts[j*ncon+h],
          badmaxpwgt[j*ncon+h], pgnpwgts[j*ncon+h]));
        }
      }

      /****************************************************/
      /* select moves to undo according to overfill array */
      /****************************************************/
      if (overweight == 1) {
        for (iii=0; iii<nmoved; iii++) {
          i = moved[iii];
          oldto = tmp_where[i];
          nvwgt = graph->nvwgt+i*ncon;
          my_edegrees = tmp_rinfo[i].degrees;

          for (k=0; k<tmp_rinfo[i].ndegrees; k++)
            if (my_edegrees[k].edge == where[i])
              break;

          for (h=0; h<ncon; h++)
            if (nvwgt[h] > 0.0 && overfill[oldto*ncon+h] > nvwgt[h]/4.0)
              break;

          /**********************************/
          /* nullify this move if necessary */
          /**********************************/
          if (k != tmp_rinfo[i].ndegrees && h != ncon) {
            moved[iii] = -1;
            from = oldto;
            to = where[i];

            for (h=0; h<ncon; h++) {
              overfill[oldto*ncon+h] = amax(overfill[oldto*ncon+h]-nvwgt[h], 0.0);
            }

            tmp_where[i] = to;
            tmp_rinfo[i].ed += tmp_rinfo[i].id-my_edegrees[k].ewgt;
            SWAP(tmp_rinfo[i].id, my_edegrees[k].ewgt, j);
            if (my_edegrees[k].ewgt == 0) {
              tmp_rinfo[i].ndegrees--;
              my_edegrees[k].edge = my_edegrees[tmp_rinfo[i].ndegrees].edge;
              my_edegrees[k].ewgt = my_edegrees[tmp_rinfo[i].ndegrees].ewgt;
            }
            else {
              my_edegrees[k].edge = from;
            }

            for (h=0; h<ncon; h++) {
              lnpwgts[to*ncon+h] += nvwgt[h];
              lnpwgts[from*ncon+h] -= nvwgt[h];
            }

            /* Update the degrees of adjacent vertices */
            for (j=xadj[i]; j<xadj[i+1]; j++) {
              /* no need to bother about vertices on different pe's */
              if (ladjncy[j] >= nvtxs)
                continue;

              me = ladjncy[j];
              mydomain = tmp_where[me];

              myrinfo = tmp_rinfo+me;
              your_edegrees = myrinfo->degrees;

              if (mydomain == from) {
                INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
              }
              else {
                if (mydomain == to) {
                  INC_DEC(myrinfo->id, myrinfo->ed, adjwgt[j]);
                }
              }

              /* Remove contribution from the .ed of 'from' */
              if (mydomain != from) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == from) {
                    if (your_edegrees[k].ewgt == adjwgt[j]) {
                      myrinfo->ndegrees--;
                      your_edegrees[k].edge = your_edegrees[myrinfo->ndegrees].edge;
                      your_edegrees[k].ewgt = your_edegrees[myrinfo->ndegrees].ewgt;
                    }
                    else {
                      your_edegrees[k].ewgt -= adjwgt[j];
                    }
                    break;
                  }
                }
              }

              /* Add contribution to the .ed of 'to' */
              if (mydomain != to) {
                for (k=0; k<myrinfo->ndegrees; k++) {
                  if (your_edegrees[k].edge == to) {
                    your_edegrees[k].ewgt += adjwgt[j];
                    break;
                  }
                }
                if (k == myrinfo->ndegrees) {
                  your_edegrees[myrinfo->ndegrees].edge = to;
                  your_edegrees[myrinfo->ndegrees++].ewgt = adjwgt[j];
                }
              }
            }
          }
        }
      }

      /*************************************************/
      /* PASS TWO -- commit the remainder of the moves */
      /*************************************************/
      nlupd = nsupd = nmoves = nchanged = 0;
      for (iii=0; iii<nmoved; iii++) {
        i = moved[iii];
        if (i == -1)
          continue;

        where[i] = tmp_where[i];

        /* Make sure to update the vertex information */
        if (htable[i] == 0) {
          /* make sure you do the update */
          htable[i] = 1;
          update[nlupd++] = i;
        }

        /* Put the vertices adjacent to i into the update array */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          k = ladjncy[j];
          if (htable[k] == 0) {
            htable[k] = 1;
            if (k<nvtxs)
              update[nlupd++] = k;
            else
              supdate[nsupd++] = k;
          }
        }
        nmoves++;
        nswaps++;

        /* check number of zero-gain moves */
        for (k=0; k<rinfo[i].ndegrees; k++)
          if (rinfo[i].degrees[k].edge == to)
            break;
        if (rinfo[i].id == rinfo[i].degrees[k].ewgt)
          nzgswaps++;

        if (graph->pexadj[i+1]-graph->pexadj[i] > 0)
          changed[nchanged++] = i;
      }

      /* Tell interested pe's the new where[] info for the interface vertices */
      CommChangedInterfaceData(ctrl, graph, nchanged, changed, where,
      swchanges, rwchanges, wspace->pv4); 


      IFSET(ctrl->dbglvl, DBG_RMOVEINFO,
      rprintf(ctrl, "\t[%d %d], [%.4f],  [%d %d %d]\n",
      pass, c, badmaxpwgt[0],
      GlobalSESum(ctrl, nmoves),
      GlobalSESum(ctrl, nsupd),
      GlobalSESum(ctrl, nlupd)));

      /*-------------------------------------------------------------
      / Time to communicate with processors to send the vertices
      / whose degrees need to be update.
      /-------------------------------------------------------------*/
      /* Issue the receives first */
      for (i=0; i<nnbrs; i++) {
        MPI_Irecv((void *)(rupdate+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE,
                  peind[i], 1, ctrl->comm, ctrl->rreq+i);
      }

      /* Issue the sends next. This needs some preporcessing */
      for (i=0; i<nsupd; i++) {
        htable[supdate[i]] = 0;
        supdate[i] = graph->imap[supdate[i]];
      }
      iidxsort(nsupd, supdate);

      for (j=i=0; i<nnbrs; i++) {
        yourlastvtx = vtxdist[peind[i]+1];
        for (k=j; k<nsupd && supdate[k] < yourlastvtx; k++); 
        MPI_Isend((void *)(supdate+j), k-j, IDX_DATATYPE, peind[i], 1, ctrl->comm, ctrl->sreq+i);
        j = k;
      }

      /* OK, now get into the loop waiting for the send/recv operations to finish */
      MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
      for (i=0; i<nnbrs; i++) 
        MPI_Get_count(ctrl->statuses+i, IDX_DATATYPE, nupds_pe+i);
      MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);


      /*-------------------------------------------------------------
      / Place the recieved to-be updated vertices into update[] 
      /-------------------------------------------------------------*/
      for (i=0; i<nnbrs; i++) {
        pe_updates = rupdate+sendptr[i];
        for (j=0; j<nupds_pe[i]; j++) {
          k = pe_updates[j];
          if (htable[k-firstvtx] == 0) {
            htable[k-firstvtx] = 1;
            update[nlupd++] = k-firstvtx;
          }
        }
      }


      /*-------------------------------------------------------------
      / Update the rinfo of the vertices in the update[] array
      /-------------------------------------------------------------*/
      for (ii=0; ii<nlupd; ii++) {
        i = update[ii];
        ASSERT(ctrl, htable[i] == 1);

        htable[i] = 0;

        mydomain = where[i];
        myrinfo = rinfo+i;
        tmp_myrinfo = tmp_rinfo+i;
        my_edegrees = myrinfo->degrees;
        your_edegrees = tmp_myrinfo->degrees;

        graph->lmincut -= myrinfo->ed;
        myrinfo->ndegrees = 0;
        myrinfo->id = 0;
        myrinfo->ed = 0;

        for (j=xadj[i]; j<xadj[i+1]; j++) {
          yourdomain = where[ladjncy[j]];
          if (mydomain != yourdomain) {
            myrinfo->ed += adjwgt[j];

            for (k=0; k<myrinfo->ndegrees; k++) {
              if (my_edegrees[k].edge == yourdomain) {
                my_edegrees[k].ewgt += adjwgt[j];
                your_edegrees[k].ewgt += adjwgt[j];
                break;
              }
            }
            if (k == myrinfo->ndegrees) {
              my_edegrees[k].edge = yourdomain;
              my_edegrees[k].ewgt = adjwgt[j];
              your_edegrees[k].edge = yourdomain;
              your_edegrees[k].ewgt = adjwgt[j];
              myrinfo->ndegrees++;
            }
            ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
            ASSERT(ctrl, tmp_myrinfo->ndegrees <= xadj[i+1]-xadj[i]);

          }
          else {
            myrinfo->id += adjwgt[j];
          }
        }
        graph->lmincut += myrinfo->ed;

        tmp_myrinfo->id = myrinfo->id;
        tmp_myrinfo->ed = myrinfo->ed;
        tmp_myrinfo->ndegrees = myrinfo->ndegrees;
      }

      /* finally, sum-up the partition weights */
      MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, nparts*ncon,
      MPI_DOUBLE, MPI_SUM, ctrl->comm);
    }
    graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

    if (graph->mincut == oldcut)
      break;
  }

/*
  gnswaps = GlobalSESum(ctrl, nswaps);
  gnzgswaps = GlobalSESum(ctrl, nzgswaps);
  if (mype == 0)
    printf("niters: %d, nswaps: %d, nzgswaps: %d\n", pass+1, gnswaps, gnzgswaps);
*/

  GKfree((void **)&badmaxpwgt, (void **)&update, (void **)&nupds_pe, (void **)&htable, LTERM);
  GKfree((void **)&changed, (void **)&pperm, (void **)&perm, (void **)&moved, LTERM);
  GKfree((void **)&pgnpwgts, (void **)&ognpwgts, (void **)&overfill, (void **)&movewgts, LTERM);
  GKfree((void **)&tmp_where, (void **)&tmp_rinfo, (void **)&tmp_edegrees, LTERM);

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayTmr));
}
예제 #10
0
/*************************************************************************
* This function allocates memory for the workspace
**************************************************************************/
void AllocateWorkSpace(CtrlType *ctrl, GraphType *graph, int nparts)
{
  ctrl->wspace.pmat = NULL;

  if (ctrl->optype == OP_KMETIS) {
    ctrl->wspace.edegrees = (EDegreeType *)GKmalloc(graph->nedges*sizeof(EDegreeType), "AllocateWorkSpace: edegrees");
    ctrl->wspace.vedegrees = NULL;
    ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.edegrees;

    ctrl->wspace.pmat = idxmalloc(nparts*nparts, "AllocateWorkSpace: pmat"); 

    /* Memory requirements for different phases
          Coarsening
                    Matching: 4*nvtxs vectors
                 Contraction: 2*nvtxs vectors (from the above 4), 1*nparts, 1*Nedges
            Total = MAX(4*nvtxs, 2*nvtxs+nparts+nedges)

          Refinement
                Random Refinement/Balance: 5*nparts + 1*nvtxs + 2*nedges
                Greedy Refinement/Balance: 5*nparts + 2*nvtxs + 2*nedges + 1*PQueue(==Nvtxs)
            Total = 5*nparts + 3*nvtxs + 2*nedges

         Total = 5*nparts + 3*nvtxs + 2*nedges 
    */
    ctrl->wspace.maxcore = 3*(graph->nvtxs+1) +                 /* Match/Refinement vectors */
                           5*(nparts+1) +                       /* Partition weights etc */
                           graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* Greedy k-way balance/refine */
                           20  /* padding for 64 bit machines */
                           ;
  }
  else if (ctrl->optype == OP_KVMETIS) {
    ctrl->wspace.edegrees = NULL;
    ctrl->wspace.vedegrees = (VEDegreeType *)GKmalloc(graph->nedges*sizeof(VEDegreeType), "AllocateWorkSpace: vedegrees");
    ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.vedegrees;

    ctrl->wspace.pmat = idxmalloc(nparts*nparts, "AllocateWorkSpace: pmat");

    /* Memory requirements for different phases are identical to KMETIS */
    ctrl->wspace.maxcore = 3*(graph->nvtxs+1) +                 /* Match/Refinement vectors */
                           3*(nparts+1) +                       /* Partition weights etc */
                           graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* Greedy k-way balance/refine */
                           20  /* padding for 64 bit machines */
                           ;
  }
  else {
    ctrl->wspace.edegrees = (EDegreeType *)idxmalloc(graph->nedges, "AllocateWorkSpace: edegrees");
    ctrl->wspace.vedegrees = NULL;
    ctrl->wspace.auxcore = (idxtype *)ctrl->wspace.edegrees;

    ctrl->wspace.maxcore = 5*(graph->nvtxs+1) +                 /* Refinement vectors */
                           4*(nparts+1) +                       /* Partition weights etc */
                           2*graph->ncon*graph->nvtxs*(sizeof(ListNodeType)/sizeof(idxtype)) + /* 2-way refinement */
                           2*graph->ncon*(NEG_GAINSPAN+PLUS_GAINSPAN+1)*(sizeof(ListNodeType *)/sizeof(idxtype)) + /* 2-way refinement */
                           20  /* padding for 64 bit machines */
                           ;
  }

  ctrl->wspace.maxcore += HTLENGTH;
  ctrl->wspace.core = idxmalloc(ctrl->wspace.maxcore, "AllocateWorkSpace: maxcore");
  ctrl->wspace.ccore = 0;
}
예제 #11
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void Mc_Serial_Balance2Way(GraphType *graph, float *tpwgts, float lbfactor)
{
  int i, ii, j, k, kwgt, nvtxs, ncon, nbnd, nswaps, from, to, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  FPQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, newcut, mincutorder;
  int qsizes[MAXNCON][2];
  KeyValueType *cand;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->sendind;
  ed = graph->recvind;
  npwgts = graph->gnpwgts;
  bndptr = graph->sendptr;
  bndind = graph->recvptr;

  moved = idxmalloc(nvtxs, "moved");
  swaps = idxmalloc(nvtxs, "swaps");
  qnum = idxmalloc(nvtxs, "qnum");
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");


  limit = amin(amax(0.01*nvtxs, 15), 100);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    FPQueueInit(&parts[i][0], nvtxs);
    FPQueueInit(&parts[i][1], nvtxs);
    qsizes[i][0] = qsizes[i][1] = 0;
  }

  for (i=0; i<nvtxs; i++) {
    qnum[i] = samax(ncon, nvwgt+i*ncon);
    qsizes[qnum[i]][where[i]]++;
  }

  for (from=0; from<2; from++) {
    for (j=0; j<ncon; j++) {
      if (qsizes[j][from] == 0) {
        for (i=0; i<nvtxs; i++) {
          if (where[i] != from)
            continue;

          k = samax2(ncon, nvwgt+i*ncon);
          if (k == j &&
               qsizes[qnum[i]][from] > qsizes[j][from] &&
               nvwgt[i*ncon+qnum[i]] < 1.3*nvwgt[i*ncon+j]) {
            qsizes[qnum[i]][from]--;
            qsizes[j][from]++;
            qnum[i] = j;
          }
        }
      }
    }
  }


  for (i=0; i<ncon; i++)
    mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
  minbal = origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
  newcut = mincut = graph->mincut;
  mincutorder = -1;

  idxset(nvtxs, -1, moved);

  /* Insert all nodes in the priority queues */
  nbnd = graph->gnvtxs;
  for (i=0; i<nvtxs; i++) {
    cand[i].key = id[i]-ed[i];
    cand[i].val = i;
  }
  ikeysort(nvtxs, cand);

  for (ii=0; ii<nvtxs; ii++) {
    i = cand[ii].val;
    FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i]));
  }

  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (minbal < lbfactor)
      break;

    Serial_SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
    to = (from+1)%2;

    if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    if (newbal < minbal || (newbal == minbal &&
        (newcut < mincut || (newcut == mincut &&
          Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
      mincut = newcut;
      minbal = newbal;
      mincutorder = nswaps;
      for (i=0; i<ncon; i++)
        mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    /**************************************************************
    * Update the id[i]/ed[i] values of the affected nodes
    ***************************************************************/
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];
      oldgain = ed[k]-id[k];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      /* Update the queue position */
      if (moved[k] == -1)
        FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)(oldgain), (float)(ed[k]-id[k]));

      /* Update its boundary information */
      if (ed[k] == 0 && bndptr[k] != -1)
        BNDDelete(nbnd, bndind, bndptr, k);
      else if (ed[k] > 0 && bndptr[k] == -1)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }


  /****************************************************************
  * Roll back computations
  *****************************************************************/
  for (nswaps--; nswaps>mincutorder; nswaps--) {
    higain = swaps[nswaps];

    to = where[higain] = (where[higain]+1)%2;
    SWAP(id[higain], ed[higain], tmp);
    if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
      BNDDelete(nbnd, bndind,  bndptr, higain);
    else if (ed[higain] > 0 && bndptr[higain] == -1)
      BNDInsert(nbnd, bndind,  bndptr, higain);

    saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
    for (j=xadj[higain]; j<xadj[higain+1]; j++) {
      k = adjncy[j];

      kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
      INC_DEC(id[k], ed[k], kwgt);

      if (bndptr[k] != -1 && ed[k] == 0)
        BNDDelete(nbnd, bndind, bndptr, k);
      if (bndptr[k] == -1 && ed[k] > 0)
        BNDInsert(nbnd, bndind, bndptr, k);
    }
  }

  graph->mincut = mincut;
  graph->gnvtxs = nbnd;


  for (i=0; i<ncon; i++) {
    FPQueueFree(&parts[i][0]);
    FPQueueFree(&parts[i][1]);
  }

  GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
  return;
}
예제 #12
0
파일: io.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function reads the CSR matrix
**************************************************************************/
void ParallelReadGraph(GraphType *graph, char *filename, MPI_Comm comm)
{
  int i, k, l, pe;
  int npes, mype, ier;
  int gnvtxs, nvtxs, your_nvtxs, your_nedges, gnedges;
  int maxnvtxs = -1, maxnedges = -1;
  int readew = -1, readvw = -1, dummy, edge;
  idxtype *vtxdist, *xadj, *adjncy, *vwgt, *adjwgt;
  idxtype *your_xadj, *your_adjncy, *your_vwgt, *your_adjwgt, graphinfo[4];
  int fmt, ncon, nobj;
  MPI_Status stat;
  char *line = NULL, *oldstr, *newstr;
  FILE *fpin = NULL;

  MPI_Comm_size(comm, &npes);
  MPI_Comm_rank(comm, &mype);

  vtxdist = graph->vtxdist = idxsmalloc(npes+1, 0, "ReadGraph: vtxdist");

  if (mype == npes-1) {
    ier = 0;
    fpin = fopen(filename, "r");

    if (fpin == NULL){
      printf("COULD NOT OPEN FILE '%s' FOR SOME REASON!\n", filename);
      ier++;
    }

    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      MPI_Finalize();
      exit(0);
    }

    line = (char *)GKmalloc(sizeof(char)*(MAXLINE+1), "line");

    do {
      fgets(line, MAXLINE, fpin);
    } while (line[0] == '%' && !feof(fpin));

    fmt = ncon = nobj = 0;
    sscanf(line, "%d %d %d %d %d", &gnvtxs, &gnedges, &fmt, &ncon, &nobj);
    gnedges *=2;
    readew = (fmt%10 > 0);
    readvw = ((fmt/10)%10 > 0);
    graph->ncon = ncon = (ncon == 0 ? 1 : ncon);
    graph->nobj = nobj = (nobj == 0 ? 1 : nobj);

/*    printf("Nvtxs: %d, Nedges: %d, Ncon: %d\n", gnvtxs, gnedges, ncon); */

    graphinfo[0] = ncon;
    graphinfo[1] = nobj;
    graphinfo[2] = readvw;
    graphinfo[3] = readew;
    MPI_Bcast((void *)graphinfo, 4, IDX_DATATYPE, npes-1, comm);

    /* Construct vtxdist and send it to all the processors */
    vtxdist[0] = 0;
    for (i=0,k=gnvtxs; i<npes; i++) {
      l = k/(npes-i);
      vtxdist[i+1] = vtxdist[i]+l;
      k -= l;
    }

    MPI_Bcast((void *)vtxdist, npes+1, IDX_DATATYPE, npes-1, comm);
  }
  else {
    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      MPI_Finalize();
      exit(0);
    }

    MPI_Bcast((void *)graphinfo, 4, IDX_DATATYPE, npes-1, comm);
    graph->ncon = ncon = graphinfo[0];
    graph->nobj = nobj = graphinfo[1];
    readvw = graphinfo[2];
    readew = graphinfo[3];

    MPI_Bcast((void *)vtxdist, npes+1, IDX_DATATYPE, npes-1, comm);
  }

  if ((ncon > 1 && !readvw) || (nobj > 1 && !readew)) {
    printf("fmt and ncon/nobj are inconsistant.  Exiting...\n");
    MPI_Finalize();
    exit(-1);
  }

  graph->gnvtxs = vtxdist[npes];
  nvtxs = graph->nvtxs = vtxdist[mype+1]-vtxdist[mype];
  xadj = graph->xadj = idxmalloc(graph->nvtxs+1, "ParallelReadGraph: xadj");
  vwgt = graph->vwgt = idxmalloc(graph->nvtxs*ncon, "ParallelReadGraph: vwgt");
  /*******************************************/
  /* Go through first time and generate xadj */
  /*******************************************/
  if (mype == npes-1) {
    maxnvtxs = 0;
    for (i=0; i<npes; i++) {
      maxnvtxs = (maxnvtxs < vtxdist[i+1]-vtxdist[i]) ?
      vtxdist[i+1]-vtxdist[i] : maxnvtxs;
    }

    your_xadj = idxmalloc(maxnvtxs+1, "your_xadj");
    your_vwgt = idxmalloc(maxnvtxs*ncon, "your_vwgt");

    maxnedges = 0;
    for (pe=0; pe<npes; pe++) {
      idxset(maxnvtxs*ncon, 1, your_vwgt);
      your_nvtxs = vtxdist[pe+1]-vtxdist[pe];
      for (i=0; i<your_nvtxs; i++) {
        your_nedges = 0;

        do {
          fgets(line, MAXLINE, fpin);
        } while (line[0] == '%' && !feof(fpin));

        oldstr = line;
        newstr = NULL;

        if (readvw) {
          for (l=0; l<ncon; l++) {
            your_vwgt[i*ncon+l] = (int)strtol(oldstr, &newstr, 10);
            oldstr = newstr;
          }
        }

        for (;;) {
          edge = (int)strtol(oldstr, &newstr, 10) -1;
          oldstr = newstr;

          if (edge < 0)
            break;

          if (readew) {
            for (l=0; l<nobj; l++) {
              dummy = (int)strtol(oldstr, &newstr, 10);
              oldstr = newstr;
            }
          }
          your_nedges++;
        }
        your_xadj[i] = your_nedges;
      }

      MAKECSR(i, your_nvtxs, your_xadj);
      maxnedges = (maxnedges < your_xadj[your_nvtxs]) ?
      your_xadj[your_nvtxs] : maxnedges;

      if (pe < npes-1) {
        MPI_Send((void *)your_xadj, your_nvtxs+1, IDX_DATATYPE, pe, 0, comm);
        MPI_Send((void *)your_vwgt, your_nvtxs*ncon, IDX_DATATYPE, pe, 1, comm);
      }
      else {
        for (i=0; i<your_nvtxs+1; i++)
          xadj[i] = your_xadj[i];
        for (i=0; i<your_nvtxs*ncon; i++)
          vwgt[i] = your_vwgt[i];
      }
    }
    fclose(fpin);
    GKfree(&your_xadj, &your_vwgt, LTERM);
  }
  else {
    MPI_Recv((void *)xadj, nvtxs+1, IDX_DATATYPE, npes-1, 0, comm, &stat);
    MPI_Recv((void *)vwgt, nvtxs*ncon, IDX_DATATYPE, npes-1, 1, comm, &stat);
  }

  graph->nedges = xadj[nvtxs];
  adjncy = graph->adjncy = idxmalloc(xadj[nvtxs], "ParallelReadGraph: adjncy");
  adjwgt = graph->adjwgt = idxmalloc(xadj[nvtxs]*nobj, "ParallelReadGraph: adjwgt");
  /***********************************************/
  /* Now go through again and record adjncy data */
  /***********************************************/
  if (mype == npes-1) {
    ier = 0;
    fpin = fopen(filename, "r");

    if (fpin == NULL){
      printf("COULD NOT OPEN FILE '%s' FOR SOME REASON!\n", filename);
      ier++;
    }

    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      MPI_Finalize();
      exit(0);
    }

    /* get first line again */
    do {
      fgets(line, MAXLINE, fpin);
    } while (line[0] == '%' && !feof(fpin));

    your_adjncy = idxmalloc(maxnedges, "your_adjncy");
    your_adjwgt = idxmalloc(maxnedges*nobj, "your_adjwgt");

    for (pe=0; pe<npes; pe++) {
      your_nedges = 0;
      idxset(maxnedges*nobj, 1, your_adjwgt);
      your_nvtxs = vtxdist[pe+1]-vtxdist[pe];
      for (i=0; i<your_nvtxs; i++) {
        do {
          fgets(line, MAXLINE, fpin);
        } while (line[0] == '%' && !feof(fpin));

        oldstr = line;
        newstr = NULL;

        if (readvw) {
          for (l=0; l<ncon; l++) {
            dummy = (int)strtol(oldstr, &newstr, 10);
            oldstr = newstr;
          }
        }

        for (;;) {
          edge = (int)strtol(oldstr, &newstr, 10) -1;
          oldstr = newstr;

          if (edge < 0)
            break;

          your_adjncy[your_nedges] = edge;
          if (readew) {
            for (l=0; l<nobj; l++) {
              your_adjwgt[your_nedges*nobj+l] = (int)strtol(oldstr, &newstr, 10);
              oldstr = newstr;
            }
          }
          your_nedges++;

        }
      }
      if (pe < npes-1) {
        MPI_Send((void *)your_adjncy, your_nedges, IDX_DATATYPE, pe, 0, comm);
        MPI_Send((void *)your_adjwgt, your_nedges*nobj, IDX_DATATYPE, pe, 1, comm);
      }
      else {
        for (i=0; i<your_nedges; i++)
          adjncy[i] = your_adjncy[i];
        for (i=0; i<your_nedges*nobj; i++)
          adjwgt[i] = your_adjwgt[i];
      }
    }
    fclose(fpin);
    GKfree(&your_adjncy, &your_adjwgt, &line, LTERM);
  }
  else {
    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      MPI_Finalize();
      exit(0);
    }

    MPI_Recv((void *)adjncy, xadj[nvtxs], IDX_DATATYPE, npes-1, 0, comm, &stat);
    MPI_Recv((void *)adjwgt, xadj[nvtxs]*nobj, IDX_DATATYPE, npes-1, 1, comm, &stat);
  }

}
예제 #13
0
/*************************************************************************
* This function computes the initial id/ed 
**************************************************************************/
void Moc_ComputePartitionParams(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int h, i, j, k;
  int nvtxs, ncon;
  int firstvtx, lastvtx;
  idxtype *xadj, *ladjncy, *adjwgt, *vtxdist;
  floattype *lnpwgts, *gnpwgts;
  idxtype *where, *swhere, *rwhere;
  RInfoType *rinfo, *myrinfo;
  EdgeType *edegrees;
  int me, other;

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->KWayInitTmr));


  nvtxs = graph->nvtxs;
  ncon = graph->ncon;

  vtxdist = graph->vtxdist;
  xadj = graph->xadj;
  ladjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  where = graph->where;
  rinfo = graph->rinfo = (RInfoType *)GKmalloc(sizeof(RInfoType)*nvtxs, "CPP: rinfo");
  lnpwgts = graph->lnpwgts = fmalloc(ctrl->nparts*ncon, "CPP: lnpwgts");
  gnpwgts = graph->gnpwgts = fmalloc(ctrl->nparts*ncon, "CPP: gnpwgts");

  sset(ctrl->nparts*ncon, 0, lnpwgts);

  firstvtx = vtxdist[ctrl->mype];
  lastvtx = vtxdist[ctrl->mype+1];

  /*------------------------------------------------------------
  / Send/Receive the where information of interface vertices
  /------------------------------------------------------------*/
  swhere = wspace->indices;
  rwhere = where + nvtxs;

  CommInterfaceData(ctrl, graph, where, swhere, rwhere); 

#ifdef DEBUG_COMPUTEPPARAM
  PrintVector(ctrl, nvtxs, firstvtx, where, "where");
#endif

  ASSERT(ctrl, wspace->nlarge >= xadj[nvtxs]);

  /*------------------------------------------------------------
  / Compute now the id/ed degrees
  /------------------------------------------------------------*/
  graph->lmincut = 0;
  for (i=0; i<nvtxs; i++) {
    me = where[i];
    myrinfo = rinfo+i;

    for (h=0; h<ncon; h++)
      lnpwgts[me*ncon+h] += graph->nvwgt[i*ncon+h];

    myrinfo->degrees = wspace->degrees + xadj[i];
    myrinfo->ndegrees = myrinfo->id = myrinfo->ed = 0;

    for (j=xadj[i]; j<xadj[i+1]; j++) {
      if (me == where[ladjncy[j]])
        myrinfo->id += adjwgt[j];
      else
        myrinfo->ed += adjwgt[j];
    }


    if (myrinfo->ed > 0) {  /* Time to do some serious work */
      graph->lmincut += myrinfo->ed;
      edegrees = myrinfo->degrees;

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        other = where[ladjncy[j]];
        if (me != other) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (edegrees[k].edge == other) {
              edegrees[k].ewgt += adjwgt[j];
              break;
            }
          }
          if (k == myrinfo->ndegrees) {
            edegrees[k].edge = other;
            edegrees[k].ewgt = adjwgt[j];
            myrinfo->ndegrees++;
          }
          ASSERT(ctrl, myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
        }
      }
    }
  }

#ifdef DEBUG_COMPUTEPPARAM
  PrintVector(ctrl, ctrl->nparts*ncon, 0, lnpwgts, "lnpwgts");
#endif

  /* Finally, sum-up the partition weights */
  MPI_Allreduce((void *)lnpwgts, (void *)gnpwgts, ctrl->nparts*ncon, MPI_DOUBLE, MPI_SUM, ctrl->comm);

  graph->mincut = GlobalSESum(ctrl, graph->lmincut)/2;

#ifdef DEBUG_COMPUTEPPARAM
  PrintVector(ctrl, ctrl->nparts*ncon, 0, gnpwgts, "gnpwgts");
#endif

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->KWayInitTmr));
}
예제 #14
0
파일: mesh.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, 
                 int *numflag, int *ncommonnodes, idxtype **xadj, 
		 idxtype **adjncy, MPI_Comm *comm)
{
  int i, j, jj, k, kk, m;
  int npes, mype, pe, count, mask, pass;
  int nelms, lnns, my_nns, node;
  int firstelm, firstnode, lnode, nrecv, nsend;
  int *scounts, *rcounts, *sdispl, *rdispl;
  idxtype *nodedist, *nmap, *auxarray;
  idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
  idxtype *sbuffer, *rbuffer, *htable;
  KeyValueType *nodelist, *recvbuffer;
  idxtype ind[200], wgt[200];
  int gmaxnode, gminnode;
  CtrlType ctrl;


  SetUpCtrl(&ctrl, -1, 0, *comm);

  npes = ctrl.npes;
  mype = ctrl.mype;

  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERTS(nelms > 0);

  /* construct node distribution array */
  nodedist = idxsmalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
  auxarray = idxmalloc(eptr[nelms], "auxarray");
  htable   = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(4*npes+2, "scounts");
  rcounts  = scounts+npes;
  sdispl   = scounts+2*npes;
  rdispl   = scounts+3*npes+1;


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikeysort(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = idxmalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  MPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERTS(pe < npes);

  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERTS(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");

  MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = idxsmalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERTS(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERTS(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv = rdispl[npes];
  rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));

  MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  k = -1;
  nptr = idxsmalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERTS(k+1 == lnns);
  ASSERTS(nptr[lnns] == nrecv)

  myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
  idxset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommonnodes) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, 
         (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, 
	 (void **)&gnind, (void **)&auxarray, LTERM);

  FreeCtrl(&ctrl);

  return;
}
예제 #15
0
/**************************************************************
*  This subroutine remaps a partitioning on a single processor
**************************************************************/
void SerialRemap(GraphType *graph, int nparts, idxtype *base, idxtype *scratch,
     idxtype *remap, float *tpwgts)
{
  int i, ii, j, k;
  int nvtxs, nmapped, max_mult;
  int from, to, current_from, smallcount, bigcount;
  KeyValueType *flowto, *bestflow;
  KeyKeyValueType *sortvtx;
  idxtype *vsize, *htable, *map, *rowmap;

  nvtxs = graph->nvtxs;
  vsize = graph->vsize;
  max_mult = amin(MAX_NPARTS_MULTIPLIER, nparts);

  sortvtx = (KeyKeyValueType *)GKmalloc(nvtxs*sizeof(KeyKeyValueType), "sortvtx");
  flowto = (KeyValueType *)GKmalloc((nparts*max_mult+nparts)*sizeof(KeyValueType), "flowto");
  bestflow = flowto+nparts;
  map = htable = idxsmalloc(nparts*2, -1, "htable");
  rowmap = map+nparts;

  for (i=0; i<nvtxs; i++) {
    sortvtx[i].key1 = base[i];
    sortvtx[i].key2 = vsize[i];
    sortvtx[i].val = i;
  }

  qsort((void *)sortvtx, (size_t)nvtxs, (size_t)sizeof(KeyKeyValueType), SSMIncKeyCmp);

  for (j=0; j<nparts; j++) {
    flowto[j].key = 0;
    flowto[j].val = j;
  }

  /* this step has nparts*nparts*log(nparts) computational complexity */
  bigcount = smallcount = current_from = 0;
  for (ii=0; ii<nvtxs; ii++) {
    i = sortvtx[ii].val;
    from = base[i];
    to = scratch[i];

    if (from > current_from) {
      /* reset the hash table */
      for (j=0; j<smallcount; j++)
        htable[flowto[j].val] = -1;
      ASSERTS(idxsum(nparts, htable) == -nparts);

      ikeysort(smallcount, flowto);

      for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) {
        bestflow[bigcount].key = flowto[j].key;
        bestflow[bigcount].val = current_from*nparts+flowto[j].val;
      }

      smallcount = 0;
      current_from = from;
    }

    if (htable[to] == -1) {
      htable[to] = smallcount;
      flowto[smallcount].key = -vsize[i];
      flowto[smallcount].val = to;
      smallcount++;
    }
    else {
      flowto[htable[to]].key += -vsize[i];
    }
  }

  /* reset the hash table */
  for (j=0; j<smallcount; j++)
    htable[flowto[j].val] = -1;
  ASSERTS(idxsum(nparts, htable) == -nparts);

  ikeysort(smallcount, flowto);

  for (j=0; j<amin(smallcount, max_mult); j++, bigcount++) {
    bestflow[bigcount].key = flowto[j].key;
    bestflow[bigcount].val = current_from*nparts+flowto[j].val;
  }
  ikeysort(bigcount, bestflow);

  ASSERTS(idxsum(nparts, map) == -nparts);
  ASSERTS(idxsum(nparts, rowmap) == -nparts);
  nmapped = 0;

  /* now make as many assignments as possible */
  for (ii=0; ii<bigcount; ii++) {
    i = bestflow[ii].val;
    j = i % nparts;  /* to */
    k = i / nparts;  /* from */

    if (map[j] == -1 && rowmap[k] == -1 && SimilarTpwgts(tpwgts, graph->ncon, j, k)) {
      map[j] = k;
      rowmap[k] = j;
      nmapped++;
    }

    if (nmapped == nparts)
      break;
  }


  /* remap the rest */
  /* it may help try remapping to the same label first */
  if (nmapped < nparts) {
    for (j=0; j<nparts && nmapped<nparts; j++) {
      if (map[j] == -1) {
        for (ii=0; ii<nparts; ii++) {
          i = (j+ii) % nparts;
          if (rowmap[i] == -1 && SimilarTpwgts(tpwgts, graph->ncon, i, j)) {
            map[j] = i;
            rowmap[i] = j;
            nmapped++;
            break;
          }
        }
      }
    }
  }

  /* check to see if remapping fails (due to dis-similar tpwgts) */
  /* if remapping fails, revert to original mapping */
  if (nmapped < nparts)
    for (i=0; i<nparts; i++)
      map[i] = i;

  for (i=0; i<nvtxs; i++)
    remap[i] = map[remap[i]];

  GKfree((void **)&sortvtx, (void **)&flowto, (void **)&htable, LTERM);
}
예제 #16
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
void Mc_Serial_FM_2WayRefine(GraphType *graph, float *tpwgts, int npasses)
{
  int i, ii, j, k;
  int kwgt, nvtxs, ncon, nbnd, nswaps, from, to, pass, limit, tmp, cnum;
  idxtype *xadj, *adjncy, *adjwgt, *where, *id, *ed, *bndptr, *bndind;
  idxtype *moved, *swaps, *qnum;
  float *nvwgt, *npwgts, mindiff[MAXNCON], origbal, minbal, newbal;
  FPQueueType parts[MAXNCON][2];
  int higain, oldgain, mincut, initcut, newcut, mincutorder;
  float rtpwgts[MAXNCON*2];
  KeyValueType *cand;
int mype;
MPI_Comm_rank(MPI_COMM_WORLD, &mype);

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  id = graph->sendind;
  ed = graph->recvind;
  npwgts = graph->gnpwgts;
  bndptr = graph->sendptr;
  bndind = graph->recvptr;

  moved = idxmalloc(nvtxs, "moved");
  swaps = idxmalloc(nvtxs, "swaps");
  qnum = idxmalloc(nvtxs, "qnum");
  cand = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "cand");

  limit = amin(amax(0.01*nvtxs, 25), 150);

  /* Initialize the queues */
  for (i=0; i<ncon; i++) {
    FPQueueInit(&parts[i][0], nvtxs);
    FPQueueInit(&parts[i][1], nvtxs);
  }
  for (i=0; i<nvtxs; i++)
    qnum[i] = samax(ncon, nvwgt+i*ncon);

  origbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

  for (i=0; i<ncon; i++) {
    rtpwgts[i] = origbal*tpwgts[i];
    rtpwgts[ncon+i] = origbal*tpwgts[ncon+i];
  }

  idxset(nvtxs, -1, moved);
  for (pass=0; pass<npasses; pass++) { /* Do a number of passes */
    for (i=0; i<ncon; i++) {
      FPQueueReset(&parts[i][0]);
      FPQueueReset(&parts[i][1]);
    }

    mincutorder = -1;
    newcut = mincut = initcut = graph->mincut;
    for (i=0; i<ncon; i++)
      mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
    minbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    /* Insert boundary nodes in the priority queues */
    nbnd = graph->gnvtxs;

    for (i=0; i<nbnd; i++) {
      cand[i].key = id[i]-ed[i];
      cand[i].val = i;
    }
    ikeysort(nbnd, cand);

    for (ii=0; ii<nbnd; ii++) {
      i = bndind[cand[ii].val];
      FPQueueInsert(&parts[qnum[i]][where[i]], i, (float)(ed[i]-id[i]));
    }

    for (nswaps=0; nswaps<nvtxs; nswaps++) {
      Serial_SelectQueue(ncon, npwgts, rtpwgts, &from, &cnum, parts);
      to = (from+1)%2;

      if (from == -1 || (higain = FPQueueGetMax(&parts[cnum][from])) == -1)
        break;

      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);

      newcut -= (ed[higain]-id[higain]);
      newbal = Serial_Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

      if ((newcut < mincut && newbal-origbal <= .00001) ||
          (newcut == mincut && (newbal < minbal ||
                                (newbal == minbal && Serial_BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
        mincut = newcut;
        minbal = newbal;
        mincutorder = nswaps;
        for (i=0; i<ncon; i++)
          mindiff[i] = fabs(tpwgts[i]-npwgts[i]);
      }
      else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
        newcut += (ed[higain]-id[higain]);
        saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
        saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
        break;
      }

      where[higain] = to;
      moved[higain] = nswaps;
      swaps[nswaps] = higain;

      /**************************************************************
      * Update the id[i]/ed[i] values of the affected nodes
      ***************************************************************/
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);

      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];
        oldgain = ed[k]-id[k];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        /* Update its boundary information and queue position */
        if (bndptr[k] != -1) { /* If k was a boundary vertex */
          if (ed[k] == 0) { /* Not a boundary vertex any more */
            BNDDelete(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)  /* Remove it if in the queues */
              FPQueueDelete(&parts[qnum[k]][where[k]], k);
          }
          else { /* If it has not been moved, update its position in the queue */
            if (moved[k] == -1)
              FPQueueUpdate(&parts[qnum[k]][where[k]], k, (float)oldgain, (float)(ed[k]-id[k]));
          }
        }
        else {
          if (ed[k] > 0) {  /* It will now become a boundary vertex */
            BNDInsert(nbnd, bndind, bndptr, k);
            if (moved[k] == -1)
              FPQueueInsert(&parts[qnum[k]][where[k]], k, (float)(ed[k]-id[k]));
          }
        }
      }
    }

    /****************************************************************
    * Roll back computations
    *****************************************************************/
    for (i=0; i<nswaps; i++)
      moved[swaps[i]] = -1;  /* reset moved array */
    for (nswaps--; nswaps>mincutorder; nswaps--) {
      higain = swaps[nswaps];

      to = where[higain] = (where[higain]+1)%2;
      SWAP(id[higain], ed[higain], tmp);
      if (ed[higain] == 0 && bndptr[higain] != -1 && xadj[higain] < xadj[higain+1])
        BNDDelete(nbnd, bndind,  bndptr, higain);
      else if (ed[higain] > 0 && bndptr[higain] == -1)
        BNDInsert(nbnd, bndind,  bndptr, higain);

      saxpy2(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      saxpy2(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+((to+1)%2)*ncon, 1);
      for (j=xadj[higain]; j<xadj[higain+1]; j++) {
        k = adjncy[j];

        kwgt = (to == where[k] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[k], ed[k], kwgt);

        if (bndptr[k] != -1 && ed[k] == 0)
          BNDDelete(nbnd, bndind, bndptr, k);
        if (bndptr[k] == -1 && ed[k] > 0)
          BNDInsert(nbnd, bndind, bndptr, k);
      }
    }

    graph->mincut = mincut;
    graph->gnvtxs = nbnd;

    if (mincutorder == -1 || mincut == initcut)
      break;
  }

  for (i=0; i<ncon; i++) {
    FPQueueFree(&parts[i][0]);
    FPQueueFree(&parts[i][1]);
  }

  GKfree((void **)&cand, (void **)&qnum, (void **)&moved, (void **)&swaps, LTERM);
  return;
}
예제 #17
0
파일: io.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function reads the spd matrix
**************************************************************************/
void Moc_SerialReadMetisGraph(char *filename, int *r_nvtxs, int *r_ncon, int *r_nobj,
	int *r_fmt, idxtype **r_xadj, idxtype **r_vwgt, idxtype **r_adjncy,
	idxtype **r_adjwgt, int *wgtflag)
{
  int i, k, l;
  int ncon, nobj, edge, nvtxs, nedges;
  idxtype *xadj, *adjncy, *vwgt, *adjwgt;
  char *line, *oldstr, *newstr;
  int fmt, readew, readvw;
  int ewgt[MAXNOBJ];
  FILE *fpin;

  line = (char *)GKmalloc(sizeof(char)*(8192+1), "line");

  if ((fpin = fopen(filename, "r")) == NULL) {
    printf("Failed to open file %s\n", filename);
    exit(-1);
  }

  fgets(line, 8192, fpin);
  fmt = ncon = nobj = 0;
  sscanf(line, "%d %d %d %d %d", &nvtxs, &nedges, &fmt, &ncon, &nobj);
  readew = (fmt%10 > 0);
  readvw = ((fmt/10)%10 > 0);

  *wgtflag = 0;
  if (readew)
    *wgtflag += 1;
  if (readvw)
    *wgtflag += 2;

  if ((ncon > 0 && !readvw) || (nobj > 0 && !readew)) {
    printf("fmt and ncon/nobj are inconsistant.\n");
    exit(-1);
  }

  nedges *=2;
  ncon = (ncon == 0 ? 1 : ncon);
  nobj = (nobj == 0 ? 1 : nobj);

  xadj = idxmalloc(nvtxs+1, "ReadGraph: xadj");
  adjncy = idxmalloc(nedges, "Moc_ReadGraph: adjncy");
  vwgt = (readvw ? idxmalloc(ncon*nvtxs, "RG: vwgt") : NULL);
  adjwgt = (readew ? idxmalloc(nobj*nedges, "RG: adjwgt") : NULL);

  /* Start reading the graph file */
  for (xadj[0]=0, k=0, i=0; i<nvtxs; i++) {
    do {
      fgets(line, 8192, fpin);
    } while (line[0] == '%' && !feof(fpin));
    oldstr = line;
    newstr = NULL;

    if (readvw) {
      for (l=0; l<ncon; l++) {
        vwgt[i*ncon+l] = (int)strtol(oldstr, &newstr, 10);
        oldstr = newstr;
      }
    }

    for (;;) {
      edge = (int)strtol(oldstr, &newstr, 10) -1;
      oldstr = newstr;

      if (readew) {
        for (l=0; l<nobj; l++) {
          ewgt[l] = (float)strtod(oldstr, &newstr);
          oldstr = newstr;
        }
      }

      if (edge < 0)
        break;

      adjncy[k] = edge;
      if (readew)
        for (l=0; l<nobj; l++)
          adjwgt[k*nobj+l] = ewgt[l];
      k++;
    }
    xadj[i+1] = k;
  }

  fclose(fpin);

  free(line);

  *r_nvtxs = nvtxs;
  *r_ncon = ncon;
  *r_nobj = nobj;
  *r_fmt = fmt;
  *r_xadj = xadj;
  *r_vwgt = vwgt;
  *r_adjncy = adjncy;
  *r_adjwgt = adjwgt;
}
예제 #18
0
파일: io.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function reads a mesh from a file
**************************************************************************/
void ParallelReadMesh(MeshType *mesh, char *filename, MPI_Comm comm)
{
  int i, j, k, pe;
  int npes, mype, ier;
  int gnelms, nelms, your_nelms, etype, maxnelms;
  int maxnode, gmaxnode, minnode, gminnode;
  idxtype *elmdist, *elements;
  idxtype *your_elements;
  MPI_Status stat;
  char *line = NULL, *oldstr, *newstr;
  FILE *fpin = NULL;
  int esize, esizes[5] = {-1, 3, 4, 8, 4};
  int mgcnum, mgcnums[5] = {-1, 2, 3, 4, 2};

  MPI_Comm_size(comm, &npes);
  MPI_Comm_rank(comm, &mype);

  elmdist = mesh->elmdist = idxsmalloc(npes+1, 0, "ReadGraph: elmdist");

  if (mype == npes-1) {
    ier = 0;
    fpin = fopen(filename, "r");

    if (fpin == NULL){
      printf("COULD NOT OPEN FILE '%s' FOR SOME REASON!\n", filename);
      ier++;
    }

    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      fclose(fpin);
      MPI_Finalize();
      exit(0);
    }

    line = (char *)GKmalloc(sizeof(char)*(MAXLINE+1), "line");

    fgets(line, MAXLINE, fpin);
    sscanf(line, "%d %d", &gnelms, &etype);

    /* Construct elmdist and send it to all the processors */
    elmdist[0] = 0;
    for (i=0,j=gnelms; i<npes; i++) {
      k = j/(npes-i);
      elmdist[i+1] = elmdist[i]+k;
      j -= k;
    }

    MPI_Bcast((void *)elmdist, npes+1, IDX_DATATYPE, npes-1, comm);
  }
  else {
    MPI_Bcast(&ier, 1, MPI_INT, npes-1, comm);
    if (ier > 0){
      MPI_Finalize();
      exit(0);
    }

    MPI_Bcast((void *)elmdist, npes+1, IDX_DATATYPE, npes-1, comm);
  }

  MPI_Bcast((void *)(&etype), 1, MPI_INT, npes-1, comm);

  gnelms = mesh->gnelms = elmdist[npes];
  nelms = mesh->nelms = elmdist[mype+1]-elmdist[mype];
  mesh->etype = etype;
  esize = esizes[etype];
  mgcnum = mgcnums[etype];

  elements = mesh->elements = idxmalloc(nelms*esize, "ParallelReadMesh: elements");

  if (mype == npes-1) {
    maxnelms = 0;
    for (i=0; i<npes; i++) {
      maxnelms = (maxnelms > elmdist[i+1]-elmdist[i]) ?
      maxnelms : elmdist[i+1]-elmdist[i];
    }

    your_elements = idxmalloc(maxnelms*esize, "your_elements");

    for (pe=0; pe<npes; pe++) {
      your_nelms = elmdist[pe+1]-elmdist[pe];
      for (i=0; i<your_nelms; i++) {

        fgets(line, MAXLINE, fpin);
        oldstr = line;
        newstr = NULL;

        /*************************************/
        /* could get element weigts here too */
        /*************************************/

        for (j=0; j<esize; j++) {
          your_elements[i*esize+j] = (int)strtol(oldstr, &newstr, 10);
          oldstr = newstr;
        }
      }

      if (pe < npes-1) {
        MPI_Send((void *)your_elements, your_nelms*esize, IDX_DATATYPE, pe, 0, comm);
      }
      else {
        for (i=0; i<your_nelms*esize; i++)
          elements[i] = your_elements[i];
      }
    }
    fclose(fpin);
    free(your_elements);
  }
  else {
    MPI_Recv((void *)elements, nelms*esize, IDX_DATATYPE, npes-1, 0, comm, &stat);
  }

  /*********************************/
  /* now check for number of nodes */
  /*********************************/
  minnode = elements[idxamin(nelms*esize, elements)];
  MPI_Allreduce((void *)&minnode, (void *)&gminnode, 1, MPI_INT, MPI_MIN, comm);
  for (i=0; i<nelms*esize; i++)
    elements[i] -= gminnode;

  maxnode = elements[idxamax(nelms*esize, elements)];
  MPI_Allreduce((void *)&maxnode, (void *)&gmaxnode, 1, MPI_INT, MPI_MAX, comm);
  mesh->gnns = gmaxnode+1;

  if (mype==0) printf("Nelements: %d, Nnodes: %d, EType: %d\n", gnelms, mesh->gnns, etype);
}
예제 #19
0
/*************************************************************************
* This function performs an edge-based FM refinement
**************************************************************************/
int BalanceMyLink(CtrlType *ctrl, GraphType *graph, idxtype *home, int me,
  int you, float *flows, float maxdiff, float *diff_cost, float *diff_lbavg,
  float avgvwgt)
{
  int h, i, ii, j, k;
  int nvtxs, ncon;
  int nqueues, minval, maxval, higain, vtx, edge, totalv;
  int from, to, qnum, index, nchanges, cut, tmp;
  int pass, nswaps, nmoves, multiplier;
  idxtype *xadj, *vsize, *adjncy, *adjwgt, *where, *ed, *id;
  idxtype *hval, *nvpq, *inq, *map, *rmap, *ptr, *myqueue, *changes;
  float *nvwgt, lbvec[MAXNCON], pwgts[MAXNCON*2], tpwgts[MAXNCON*2], my_wgt[MAXNCON];
  float newgain, oldgain = 0.0;
  float lbavg, bestflow, mycost;
  float ipc_factor, redist_factor, ftmp;
  FPQueueType *queues;
int mype;
MPI_Comm_rank(MPI_COMM_WORLD, &mype);

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  vsize = graph->vsize;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  where = graph->where;
  ipc_factor = ctrl->ipc_factor;
  redist_factor = ctrl->redist_factor;

  hval = idxmalloc(nvtxs*7, "hval");
  id = hval + nvtxs;
  ed = hval + nvtxs*2;
  map = hval + nvtxs*3;
  rmap = hval + nvtxs*4;
  myqueue = hval + nvtxs*5;
  changes = hval + nvtxs*6;

  sset(ncon*2, 0.0, pwgts);
  for (h=0; h<ncon; h++) {
    tpwgts[h] = -1.0 * flows[h];
    tpwgts[ncon+h] = flows[h];
  }

  for (i=0; i<nvtxs; i++) {
    if (where[i] == me) {
      for (h=0; h<ncon; h++) {
        tpwgts[h] += nvwgt[i*ncon+h];
        pwgts[h] += nvwgt[i*ncon+h];
      }
    }
    else {
      ASSERTS(where[i] == you);
      for (h=0; h<ncon; h++) {
        tpwgts[ncon+h] += nvwgt[i*ncon+h];
        pwgts[ncon+h] += nvwgt[i*ncon+h];
      }
    }
  }

  /* we don't want any tpwgts to be less than zero */
  for (h=0; h<ncon; h++) {
    if (tpwgts[h] < 0.0) {
      tpwgts[ncon+h] += tpwgts[h];
      tpwgts[h] = 0.0;
    }

    if (tpwgts[ncon+h] < 0.0) {
      tpwgts[h] += tpwgts[ncon+h];
      tpwgts[ncon+h] = 0.0;
    }
  }

  /*******************************/
  /* insert vertices into queues */
  /*******************************/
  minval = maxval = 0;
  multiplier = 1;
  for (i=0; i<ncon; i++) {
    multiplier *= (i+1);
    maxval += i*multiplier;
    minval += (ncon-1-i)*multiplier;
  }

  nqueues = maxval-minval+1;
  nvpq = idxsmalloc(nqueues, 0, "nvpq");
  ptr = idxmalloc(nqueues+1, "ptr");
  inq = idxmalloc(nqueues*2, "inq");
  queues = (FPQueueType *)(GKmalloc(sizeof(FPQueueType)*nqueues*2, "queues"));

  for (i=0; i<nvtxs; i++)
    hval[i] = Moc_HashVwgts(ncon, nvwgt+i*ncon) - minval;

  for (i=0; i<nvtxs; i++)
    nvpq[hval[i]]++;

  ptr[0] = 0;
  for (i=0; i<nqueues; i++)
    ptr[i+1] = ptr[i] + nvpq[i];

  for (i=0; i<nvtxs; i++) {
    map[i] = ptr[hval[i]];
    rmap[ptr[hval[i]]++] = i;
  }

  for (i=nqueues-1; i>0; i--)
    ptr[i] = ptr[i-1];
  ptr[0] = 0;

  /* initialize queues */
  for (i=0; i<nqueues; i++)
    if (nvpq[i] > 0) {
      FPQueueInit(queues+i, nvpq[i]);
      FPQueueInit(queues+i+nqueues, nvpq[i]);
    }

  /* compute internal/external degrees */
  idxset(nvtxs, 0, id);
  idxset(nvtxs, 0, ed);
  for (j=0; j<nvtxs; j++)
    for (k=xadj[j]; k<xadj[j+1]; k++)
      if (where[adjncy[k]] == where[j])
        id[j] += adjwgt[k];
      else 
        ed[j] += adjwgt[k];

  nswaps = 0;
  for (pass=0; pass<N_MOC_BAL_PASSES; pass++) {
    idxset(nvtxs, -1, myqueue); 
    idxset(nqueues*2, 0, inq);

    /* insert vertices into correct queues */
    for (j=0; j<nvtxs; j++) {
      index = (where[j] == me) ? 0 : nqueues;

      newgain = ipc_factor*(float)(ed[j]-id[j]);
      if (home[j] == me || home[j] == you) {
        if (where[j] == home[j])
          newgain -= redist_factor*(float)vsize[j];
        else
          newgain += redist_factor*(float)vsize[j];
      }

      FPQueueInsert(queues+hval[j]+index, map[j]-ptr[hval[j]], newgain);
      myqueue[j] = (where[j] == me) ? 0 : 1;
      inq[hval[j]+index]++;
    }

/*    bestflow = sfavg(ncon, flows); */
    for (j=0, h=0; h<ncon; h++)
      if (fabs(flows[h]) > fabs(flows[j])) j = h;
        bestflow = fabs(flows[j]);

    nchanges = nmoves = 0;
    for (ii=0; ii<nvtxs/2; ii++) {
      from = -1;
      Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
      &qnum, minval, avgvwgt, maxdiff);

      /* can't find a vertex in one subdomain, try the other */
      if (from != -1 && qnum == -1) {
        from = (from == me) ? you : me;

        if (from == me) {
          for (j=0; j<ncon; j++)
            if (flows[j] > avgvwgt)
              break;
        }
        else {
          for (j=0; j<ncon; j++)
            if (flows[j] < -1.0*avgvwgt)
              break;
        }

        if (j != ncon)
          Moc_DynamicSelectQueue(nqueues, ncon, me, you, inq, flows, &from,
          &qnum, minval, avgvwgt, maxdiff);
      }

      if (qnum == -1)
        break;

      to = (from == me) ? you : me;
      index = (from == me) ? 0 : nqueues;
      higain = FPQueueGetMax(queues+qnum+index);
      inq[qnum+index]--;
      ASSERTS(higain != -1);

      /*****************/
      /* make the swap */
      /*****************/
      vtx = rmap[higain+ptr[qnum]];
      myqueue[vtx] = -1;
      where[vtx] = to;
      nswaps++;
      nmoves++;

      /* update the flows */
      for (j=0; j<ncon; j++)
        flows[j] += (to == me) ? nvwgt[vtx*ncon+j] : -1.0*nvwgt[vtx*ncon+j];
 
/*      ftmp = sfavg(ncon, flows); */
      for (j=0, h=0; h<ncon; h++)
        if (fabs(flows[h]) > fabs(flows[j])) j = h;
          ftmp = fabs(flows[j]);

      if (ftmp < bestflow) {
        bestflow = ftmp;
        nchanges = 0;
      }
      else {
        changes[nchanges++] = vtx;
      }

      SWAP(id[vtx], ed[vtx], tmp);

      for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
        edge = adjncy[j];

        /* must compute oldgain before changing id/ed */
        if (myqueue[edge] != -1) {
          oldgain = ipc_factor*(float)(ed[edge]-id[edge]);
          if (home[edge] == me || home[edge] == you) {
            if (where[edge] == home[edge])
              oldgain -= redist_factor*(float)vsize[edge];
            else
              oldgain += redist_factor*(float)vsize[edge];
          }
        }

        tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[edge], ed[edge], tmp);

        if (myqueue[edge] != -1) {
          newgain = ipc_factor*(float)(ed[edge]-id[edge]);
          if (home[edge] == me || home[edge] == you) {
            if (where[edge] == home[edge])
              newgain -= redist_factor*(float)vsize[edge];
            else
              newgain += redist_factor*(float)vsize[edge];
          }

          FPQueueUpdate(queues+hval[edge]+(nqueues*myqueue[edge]),
          map[edge]-ptr[hval[edge]], oldgain, newgain);
        }
      }
    }

    /****************************/
    /* now go back to best flow */
    /****************************/
    nswaps -= nchanges;
    nmoves -= nchanges;
    for (i=0; i<nchanges; i++) {
      vtx = changes[i];
      from = where[vtx];
      where[vtx] = to = (from == me) ? you : me;

      SWAP(id[vtx], ed[vtx], tmp);
      for (j=xadj[vtx]; j<xadj[vtx+1]; j++) {
        edge = adjncy[j];
        tmp = (to == where[edge] ? adjwgt[j] : -adjwgt[j]);
        INC_DEC(id[edge], ed[edge], tmp);
      }
    }

    for (i=0; i<nqueues; i++) {
      if (nvpq[i] > 0) {
        FPQueueReset(queues+i);
        FPQueueReset(queues+i+nqueues);
      }
    }

    if (nmoves == 0)
      break;
  }

  /***************************/
  /* compute 2-way imbalance */
  /***************************/
  sset(ncon, 0.0, my_wgt);
  for (i=0; i<nvtxs; i++)
    if (where[i] == me)
      for (h=0; h<ncon; h++)
        my_wgt[h] += nvwgt[i*ncon+h];

  for (i=0; i<ncon; i++) {
    ftmp =  (pwgts[i]+pwgts[ncon+i])/2.0;
    if (ftmp != 0.0)
      lbvec[i] = fabs(my_wgt[i]-tpwgts[i]) / ftmp;
    else
      lbvec[i] = 0.0;
  }
  lbavg = savg(ncon, lbvec);
  *diff_lbavg = lbavg;

  /****************/
  /* compute cost */
  /****************/
  cut = totalv = 0;
  for (i=0; i<nvtxs; i++) {
    if (where[i] != home[i])
      totalv += vsize[i];

      for (j=xadj[i]; j<xadj[i+1]; j++) 
        if (where[adjncy[j]] != where[i])
          cut += adjwgt[j];
  }
  cut /= 2;
  mycost = cut*ipc_factor + totalv*redist_factor;
  *diff_cost = mycost;

  /* free memory */
  for (i=0; i<nqueues; i++)
    if (nvpq[i] > 0) {
      FPQueueFree(queues+i);
      FPQueueFree(queues+i+nqueues);
    }

  GKfree((void **)&hval, (void **)&nvpq, (void **)&ptr, (void **)&inq, (void **)&queues, LTERM);
  return nswaps;
}
예제 #20
0
/*************************************************************************
* This function compresses a graph by merging identical vertices
* The compression should lead to at least 10% reduction.
**************************************************************************/
void CompressGraph(CtrlType *ctrl, GraphType *graph, int nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *cptr, idxtype *cind)
{
  int i, ii, iii, j, jj, k, l, cnvtxs, cnedges;
  idxtype *cxadj, *cadjncy, *cvwgt, *mark, *map;
  KeyValueType *keys;

  mark = idxsmalloc(nvtxs, -1, "CompressGraph: mark");
  map = idxsmalloc(nvtxs, -1, "CompressGraph: map");
  keys = (KeyValueType *)GKmalloc(nvtxs*sizeof(KeyValueType), "CompressGraph: keys");

  /* Compute a key for each adjacency list */
  for (i=0; i<nvtxs; i++) {
    k = 0;
    for (j=xadj[i]; j<xadj[i+1]; j++)
      k += adjncy[j];
    keys[i].key = k+i; /* Add the diagonal entry as well */
    keys[i].val = i;
  }

  ikeysort(nvtxs, keys);

  l = cptr[0] = 0;
  for (cnvtxs=i=0; i<nvtxs; i++) {
    ii = keys[i].val;
    if (map[ii] == -1) { 
      mark[ii] = i;  /* Add the diagonal entry */
      for (j=xadj[ii]; j<xadj[ii+1]; j++) 
        mark[adjncy[j]] = i;

      cind[l++] = ii;
      map[ii] = cnvtxs;

      for (j=i+1; j<nvtxs; j++) {
        iii = keys[j].val;

        if (keys[i].key != keys[j].key || xadj[ii+1]-xadj[ii] != xadj[iii+1]-xadj[iii])
          break; /* Break if keys or degrees are different */

        if (map[iii] == -1) { /* Do a comparison if iii has not been mapped */ 
          for (jj=xadj[iii]; jj<xadj[iii+1]; jj++) {
            if (mark[adjncy[jj]] != i)
              break;
          }

          if (jj == xadj[iii+1]) { /* Identical adjacency structure */
            map[iii] = cnvtxs;
            cind[l++] = iii;
          }
        }
      }

      cptr[++cnvtxs] = l;
    }
  }

  /* printf("Original: %6d, Compressed: %6d\n", nvtxs, cnvtxs); */


  InitGraph(graph);

  if (cnvtxs >= COMPRESSION_FRACTION*nvtxs) {
    graph->nvtxs = nvtxs;
    graph->nedges = xadj[nvtxs];
    graph->ncon = 1;
    graph->xadj = xadj;
    graph->adjncy = adjncy;

    graph->gdata = idxmalloc(3*nvtxs+graph->nedges, "CompressGraph: gdata");
    graph->vwgt    	= graph->gdata;
    graph->adjwgtsum    = graph->gdata+nvtxs;
    graph->cmap		= graph->gdata+2*nvtxs;
    graph->adjwgt	= graph->gdata+3*nvtxs;

    idxset(nvtxs, 1, graph->vwgt);
    idxset(graph->nedges, 1, graph->adjwgt);
    for (i=0; i<nvtxs; i++)
      graph->adjwgtsum[i] = xadj[i+1]-xadj[i];

    graph->label = idxmalloc(nvtxs, "CompressGraph: label");
    for (i=0; i<nvtxs; i++)
      graph->label[i] = i;
  }
  else { /* Ok, form the compressed graph  */
    cnedges = 0;
    for (i=0; i<cnvtxs; i++) {
      ii = cind[cptr[i]];
      cnedges += xadj[ii+1]-xadj[ii];
    }

    /* Allocate memory for the compressed graph*/
    graph->gdata = idxmalloc(4*cnvtxs+1 + 2*cnedges, "CompressGraph: gdata");
    cxadj = graph->xadj		= graph->gdata;
    cvwgt = graph->vwgt         = graph->gdata + cnvtxs+1;
    graph->adjwgtsum        	= graph->gdata + 2*cnvtxs+1;
    graph->cmap                 = graph->gdata + 3*cnvtxs+1;
    cadjncy = graph->adjncy     = graph->gdata + 4*cnvtxs+1;
    graph->adjwgt            	= graph->gdata + 4*cnvtxs+1 + cnedges;

    /* Now go and compress the graph */
    idxset(nvtxs, -1, mark);
    l = cxadj[0] = 0;
    for (i=0; i<cnvtxs; i++) {
      cvwgt[i] = cptr[i+1]-cptr[i];
      mark[i] = i;  /* Remove any dioganal entries in the compressed graph */
      for (j=cptr[i]; j<cptr[i+1]; j++) {
        ii = cind[j];
        for (jj=xadj[ii]; jj<xadj[ii+1]; jj++) {
          k = map[adjncy[jj]];
          if (mark[k] != i) 
            cadjncy[l++] = k;
          mark[k] = i;
        }
      }
      cxadj[i+1] = l;
    }

    graph->nvtxs = cnvtxs;
    graph->nedges = l;
    graph->ncon = 1;

    idxset(graph->nedges, 1, graph->adjwgt);
    for (i=0; i<cnvtxs; i++)
      graph->adjwgtsum[i] = cxadj[i+1]-cxadj[i];

    graph->label = idxmalloc(cnvtxs, "CompressGraph: label");
    for (i=0; i<cnvtxs; i++)
      graph->label[i] = i;

  }

	GKfree(&keys, &map, &mark, LTERM);
}
예제 #21
0
파일: setup.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function tests the repeated shmem_put
**************************************************************************/
void SetUp(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, nsend, nrecv, nnbrs, nadj;
  int npes=ctrl->npes, mype=ctrl->mype;
  idxtype *vtxdist, *xadj, *adjncy;
  idxtype *peind, *recvptr, *recvind, *sendptr, *sendind;
  idxtype *receive, *pemap, *imap, *lperm;
  idxtype *pexadj, *peadjncy, *peadjloc, *startsind;
  KeyValueType *recvrequests, *sendrequests, *adjpairs;

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SetupTmr));

  gnvtxs  = graph->gnvtxs;
  nvtxs   = graph->nvtxs;
  vtxdist = graph->vtxdist;
  xadj    = graph->xadj;
  adjncy  = graph->adjncy;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  pemap = wspace->pv1;
  idxset(npes, -1, pemap);

  lperm = graph->lperm = idxmalloc(nvtxs, "SetUp: graph->lperm");
  for (i=0; i<nvtxs; i++)
    lperm[i] = i;

  /************************************************************* 
   * Determine what you need to receive 
   *************************************************************/
  receive  = wspace->indices;  		/* Use the large global received array for now */
  adjpairs = wspace->pairs;

  for (nlocal = nadj = i = 0; i<nvtxs; i++) {
    islocal = 1;
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (k >= firstvtx && k < lastvtx) {
        adjncy[j] = k-firstvtx;
        continue;  /* local vertex */
      }
      adjpairs[nadj].key = k;
      adjpairs[nadj++].val = j;
      islocal = 0;
    }
    if (islocal) {
      lperm[i] = lperm[nlocal];
      lperm[nlocal++] = i;
    }
  }

  /* Take care the received part now */
  ikeysort(nadj, adjpairs);
  adjpairs[nadj].key = gnvtxs+1;  /* Boundary condition */
  for (nrecv=i=0; i<nadj; i++) {
    adjncy[adjpairs[i].val] = nvtxs+nrecv;
    if (adjpairs[i].key != adjpairs[i+1].key)
      receive[nrecv++] = adjpairs[i].key;
  }


  /* Allocate space for the setup info attached to this level of the graph */
  peind = graph->peind = idxmalloc(npes, "SetUp: peind");
  recvptr = graph->recvptr = idxmalloc(npes+1, "SetUp: recvptr");
  recvind = graph->recvind = idxmalloc(nrecv, "SetUp: recvind");

  /* Take care of the received portion */
  idxcopy(nrecv, receive, recvind);  /* Copy the vertices to be received into recvind */

  i = nnbrs = recvptr[0] = 0;
  for (penum=0; penum<npes; penum++) {
    for (j=i; j<nrecv; j++) {
      if (recvind[j] >= vtxdist[penum+1])
        break;
    }
    if (j > i) {
      peind[nnbrs] = penum;
      recvptr[++nnbrs] = j;
      i = j;
    }
  }


  /************************************************************* 
   * Determine what you need to send 
   *************************************************************/
  /* Tell the other processors what they need to send you */
  recvrequests = wspace->pepairs1;
  sendrequests = wspace->pepairs2;
  for (i=0; i<npes; i++)
    recvrequests[i].key = 0;
  for (i=0; i<nnbrs; i++) {
    recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i];
    recvrequests[peind[i]].val = nvtxs+recvptr[i];
  }
  MPI_Alltoall((void *)recvrequests, 2, IDX_DATATYPE, (void *)sendrequests, 2, IDX_DATATYPE, ctrl->comm);


  sendptr = graph->sendptr = idxmalloc(npes+1, "SetUp: sendptr");
  startsind = wspace->pv2;
  for (j=i=0; i<npes; i++) {
    if (sendrequests[i].key > 0) {
      sendptr[j] = sendrequests[i].key;
      startsind[j] = sendrequests[i].val;
      j++;
    }
  }
  ASSERT(ctrl, nnbrs == j);
  MAKECSR(i, j, sendptr);

  nsend = sendptr[nnbrs];
  sendind = graph->sendind = idxmalloc(nsend, "SetUp: sendind");


  /* Issue the receives for sendind */
  for (i=0; i<nnbrs; i++) {
    MPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, 
              peind[i], 1, ctrl->comm, ctrl->rreq+i);
  }

  /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */
  for (i=0; i<nnbrs; i++) {
    MPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_DATATYPE,
              peind[i], 1, ctrl->comm, ctrl->sreq+i);
  }

  MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);



  /* Create the peadjncy data structure for sparse boundary exchanges */
  pexadj = graph->pexadj = idxsmalloc(nvtxs+1, 0, "SetUp: pexadj");
  peadjncy = graph->peadjncy = idxmalloc(nsend, "SetUp: peadjncy");
  peadjloc = graph->peadjloc = idxmalloc(nsend, "SetUp: peadjloc");

  for (i=0; i<nsend; i++) {
    ASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx, (ctrl, "%d %d %d\n", sendind[i], firstvtx, lastvtx));
    pexadj[sendind[i]-firstvtx]++;
  }
  MAKECSR(i, nvtxs, pexadj);

  for (i=0; i<nnbrs; i++) {
    for (j=sendptr[i]; j<sendptr[i+1]; j++) {
      k = pexadj[sendind[j]-firstvtx]++;
      peadjncy[k] = i;  /* peind[i] is the actual PE number */
      peadjloc[k] = startsind[i]++;
    }
  }
  ASSERT(ctrl, pexadj[nvtxs] == nsend);

  for (i=nvtxs; i>0; i--)
    pexadj[i] = pexadj[i-1];
  pexadj[0] = 0;


  graph->nnbrs = nnbrs;
  graph->nrecv = nrecv;
  graph->nsend = nsend;
  graph->nlocal = nlocal;


  /* Create the inverse map from ladjncy to adjncy */
  imap = graph->imap = idxmalloc(nvtxs+nrecv, "SetUp: imap");
  for (i=0; i<nvtxs; i++)
    imap[i] = firstvtx+i;
  for (i=0; i<nrecv; i++)
    imap[nvtxs+i] = recvind[i];


  /* Check if wspace->nlarge is large enough for nrecv and nsend */
  if (wspace->nlarge < nrecv+nsend) {
    free(wspace->indices);
    free(wspace->pairs);
    wspace->nlarge = nrecv+nsend;
    wspace->indices = idxmalloc(wspace->nlarge, "SetUp: wspace->indices");
    wspace->pairs = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*wspace->nlarge, "SetUp: wspace->pairs");
  }

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SetupTmr));

#ifdef DEBUG_SETUPINFO
  rprintf(ctrl, "[%5d %5d] \tl:[%5d %5d] \ts:[%5d, %5d] \tr:[%5d, %5d]\n", 
            GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs),
            GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal),
            GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend),
            GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv));

  PrintSetUpInfo(ctrl, graph);
#endif
}
예제 #22
0
/*************************************************************************
* This function computes the subdomain graph
**************************************************************************/
void EliminateSubDomainEdges(CtrlType *ctrl, GraphType *graph, int nparts, float *tpwgts)
{
  int i, ii, j, k, me, other, nvtxs, total, max, avg, totalout, nind, ncand, ncand2, target, target2, nadd;
  int min, move, cpwgt, tvwgt;
  idxtype *xadj, *adjncy, *vwgt, *adjwgt, *pwgts, *where, *maxpwgt, *pmat, *ndoms, *mypmat, *otherpmat, *ind;
  KeyValueType *cand, *cand2;

  nvtxs = graph->nvtxs;
  xadj = graph->xadj;
  adjncy = graph->adjncy;
  vwgt = graph->vwgt;
  adjwgt = graph->adjwgt;

  where = graph->where;
  pwgts = graph->pwgts;  /* We assume that this is properly initialized */

  maxpwgt = idxwspacemalloc(ctrl, nparts);
  ndoms = idxwspacemalloc(ctrl, nparts);
  otherpmat = idxwspacemalloc(ctrl, nparts);
  ind = idxwspacemalloc(ctrl, nvtxs);
  pmat = ctrl->wspace.pmat;

  cand = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand");
  cand2 = (KeyValueType *)GKmalloc(nparts*sizeof(KeyValueType), "EliminateSubDomainEdges: cand");

  /* Compute the pmat matrix and ndoms */
  ComputeSubDomainGraph(graph, nparts, pmat, ndoms);


  /* Compute the maximum allowed weight for each domain */
  tvwgt = idxsum(nparts, pwgts);
  for (i=0; i<nparts; i++)
    maxpwgt[i] = 1.25*tpwgts[i]*tvwgt;


  /* Get into the loop eliminating subdomain connections */
  for (;;) {
    total = idxsum(nparts, ndoms);
    avg = total/nparts;
    max = ndoms[idxamax(nparts, ndoms)];

    /* printf("Adjacent Subdomain Stats: Total: %3d, Max: %3d, Avg: %3d [%5d]\n", total, max, avg, idxsum(nparts*nparts, pmat)); */

    if (max < 1.4*avg)
      break;

    me = idxamax(nparts, ndoms);
    mypmat = pmat + me*nparts;
    totalout = idxsum(nparts, mypmat);

    /*printf("Me: %d, TotalOut: %d,\n", me, totalout);*/

    /* Sort the connections according to their cut */
    for (ncand2=0, i=0; i<nparts; i++) {
      if (mypmat[i] > 0) {
        cand2[ncand2].key = mypmat[i];
        cand2[ncand2++].val = i;
      }
    }
    ikeysort(ncand2, cand2);

    move = 0;
    for (min=0; min<ncand2; min++) {
      if (cand2[min].key > totalout/(2*ndoms[me])) 
        break;

      other = cand2[min].val;

      /*printf("\tMinOut: %d to %d\n", mypmat[other], other);*/

      idxset(nparts, 0, otherpmat);

      /* Go and find the vertices in 'other' that are connected in 'me' */
      for (nind=0, i=0; i<nvtxs; i++) {
        if (where[i] == other) {
          for (j=xadj[i]; j<xadj[i+1]; j++) {
            if (where[adjncy[j]] == me) {
              ind[nind++] = i;
              break;
            }
          }
        }
      }

      /* Go and construct the otherpmat to see where these nind vertices are connected to */
      for (cpwgt=0, ii=0; ii<nind; ii++) {
        i = ind[ii];
        cpwgt += vwgt[i];

        for (j=xadj[i]; j<xadj[i+1]; j++) 
          otherpmat[where[adjncy[j]]] += adjwgt[j];
      }
      otherpmat[other] = 0;

      for (ncand=0, i=0; i<nparts; i++) {
        if (otherpmat[i] > 0) {
          cand[ncand].key = -otherpmat[i];
          cand[ncand++].val = i;
        }
      }
      ikeysort(ncand, cand);

      /* 
       * Go through and the select the first domain that is common with 'me', and
       * does not increase the ndoms[target] higher than my ndoms, subject to the
       * maxpwgt constraint. Traversal is done from the mostly connected to the least.
       */
      target = target2 = -1;
      for (i=0; i<ncand; i++) {
        k = cand[i].val;

        if (mypmat[k] > 0) {
          if (pwgts[k] + cpwgt > maxpwgt[k])  /* Check if balance will go off */
            continue;

          for (j=0; j<nparts; j++) {
            if (otherpmat[j] > 0 && ndoms[j] >= ndoms[me]-1 && pmat[nparts*j+k] == 0)
              break;
          }
          if (j == nparts) { /* No bad second level effects */
            for (nadd=0, j=0; j<nparts; j++) {
              if (otherpmat[j] > 0 && pmat[nparts*k+j] == 0)
                nadd++;
            }

            /*printf("\t\tto=%d, nadd=%d, %d\n", k, nadd, ndoms[k]);*/
            if (target2 == -1 && ndoms[k]+nadd < ndoms[me]) {
              target2 = k;
            }
            if (nadd == 0) {
              target = k;
              break;
            }
          }
        }
      }
      if (target == -1 && target2 != -1)
        target = target2;

      if (target == -1) {
        /* printf("\t\tCould not make the move\n");*/
        continue;
      }

      /*printf("\t\tMoving to %d\n", target);*/

      /* Update the partition weights */
      INC_DEC(pwgts[target], pwgts[other], cpwgt);

      MoveGroupMConn(ctrl, graph, ndoms, pmat, nparts, target, nind, ind);

      move = 1;
      break;
    }

    if (move == 0)
      break;
  }

  idxwspacefree(ctrl, nparts);
  idxwspacefree(ctrl, nparts);
  idxwspacefree(ctrl, nparts);
  idxwspacefree(ctrl, nvtxs);

  GKfree(&cand, &cand2, LTERM);
}
예제 #23
0
파일: xyzpart.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function sorts a distributed list of KeyValueType in increasing 
* order, and uses it to compute a partition. It uses samplesort. 
**************************************************************************/
void PartSort(CtrlType *ctrl, GraphType *graph, KeyValueType *elmnts, WorkSpaceType *wspace)
{
  int i, j, k, nvtxs, nrecv, npes=ctrl->npes, mype=ctrl->mype, firstvtx, lastvtx;
  idxtype *scounts, *rcounts, *vtxdist, *perm;
  KeyValueType *relmnts, *mypicks, *allpicks;

  nvtxs   = graph->nvtxs;
  vtxdist = graph->vtxdist;

  scounts = wspace->pv1;
  rcounts = wspace->pv2;

  /* Allocate memory for the splitters */
  mypicks  = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*(npes+1), "ParSort: mypicks");
  allpicks = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*npes*npes, "ParSort: allpicks");

  /* Sort the local elements */
  ikeysort(nvtxs, elmnts);

  /* Select the local npes-1 equally spaced elements */
  for (i=1; i<npes; i++) { 
    mypicks[i-1].key = elmnts[i*(nvtxs/npes)].key;
    mypicks[i-1].val = elmnts[i*(nvtxs/npes)].val;
  }

  /* PrintPairs(ctrl, npes-1, mypicks, "Mypicks"); */

  /* Gather the picks to all the processors */
  MPI_Allgather((void *)mypicks, 2*(npes-1), IDX_DATATYPE, (void *)allpicks, 2*(npes-1), IDX_DATATYPE, ctrl->comm);

  /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */

  /* Sort all the picks */
  ikeyvalsort(npes*(npes-1), allpicks);

  /* PrintPairs(ctrl, npes*(npes-1), allpicks, "Allpicks"); */

  /* Select the final splitters. Set the boundaries to simplify coding */
  for (i=1; i<npes; i++)
    mypicks[i] = allpicks[i*(npes-1)];
  mypicks[0].key    = MIN_INT;
  mypicks[npes].key = MAX_INT;

  /* PrintPairs(ctrl, npes+1, mypicks, "Mypicks"); */

  /* Compute the number of elements that belong to each bucket */
  idxset(npes, 0, scounts);
  for (j=i=0; i<nvtxs; i++) {
    if (elmnts[i].key < mypicks[j+1].key || (elmnts[i].key == mypicks[j+1].key && elmnts[i].val < mypicks[j+1].val))
      scounts[j]++;
    else
      scounts[++j]++;
  }
  MPI_Alltoall(scounts, 1, IDX_DATATYPE, rcounts, 1, IDX_DATATYPE, ctrl->comm);

/*
  PrintVector(ctrl, npes, 0, scounts, "Scounts");
  PrintVector(ctrl, npes, 0, rcounts, "Rcounts");
*/

  /* Allocate memory for sorted elements and receive them */
  MAKECSR(i, npes, scounts);
  MAKECSR(i, npes, rcounts);
  nrecv = rcounts[npes];
  if (wspace->nlarge >= nrecv)
    relmnts = (KeyValueType *)wspace->pairs;
  else
    relmnts = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*nrecv, "ParSort: relmnts");

  /* Issue the receives first */
  for (i=0; i<npes; i++) 
    MPI_Irecv((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i);

  /* Issue the sends next */
  for (i=0; i<npes; i++) 
    MPI_Isend((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i);

  MPI_Waitall(npes, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(npes, ctrl->sreq, ctrl->statuses);


  /* OK, now do the local sort of the relmnts. Use perm to keep track original order */
  perm = idxmalloc(nrecv, "ParSort: perm");
  for (i=0; i<nrecv; i++) {
    perm[i] = relmnts[i].val;
    relmnts[i].val = i;
  }
  ikeysort(nrecv, relmnts);


  /* Compute what needs to be shifted */
  MPI_Scan((void *)(&nrecv), (void *)(&lastvtx), 1, MPI_INT, MPI_SUM, ctrl->comm);
  firstvtx = lastvtx-nrecv;  

  /*myprintf(ctrl, "first, last: %d %d\n", firstvtx, lastvtx); */

  for (j=0, i=0; i<npes; i++) {
    if (vtxdist[i+1] > firstvtx) {  /* Found the first PE that is passed me */
      if (vtxdist[i+1] >= lastvtx) {
        /* myprintf(ctrl, "Shifting %d elements to processor %d\n", lastvtx-firstvtx, i); */
        for (k=0; k<lastvtx-firstvtx; k++, j++) 
          relmnts[relmnts[j].val].key = i;
      }
      else {
        /* myprintf(ctrl, "Shifting %d elements to processor %d\n", vtxdist[i+1]-firstvtx, i); */
        for (k=0; k<vtxdist[i+1]-firstvtx; k++, j++) 
          relmnts[relmnts[j].val].key = i;

        firstvtx = vtxdist[i+1];
      }
    }
    if (vtxdist[i+1] >= lastvtx)
      break;
  }

  /* Reverse the ordering on the relmnts[].val */
  for (i=0; i<nrecv; i++) {
    ASSERTP(ctrl, relmnts[i].key>=0 && relmnts[i].key<npes, (ctrl, "%d %d\n", i, relmnts[i].key));
    relmnts[i].val = perm[i];
  }

  /* OK, now sent it back */
  /* Issue the receives first */
  for (i=0; i<npes; i++) 
    MPI_Irecv((void *)(elmnts+scounts[i]), 2*(scounts[i+1]-scounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->rreq+i);

  /* Issue the sends next */
  for (i=0; i<npes; i++) 
    MPI_Isend((void *)(relmnts+rcounts[i]), 2*(rcounts[i+1]-rcounts[i]), IDX_DATATYPE, i, 1, ctrl->comm, ctrl->sreq+i);

  MPI_Waitall(npes, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(npes, ctrl->sreq, ctrl->statuses);


  /* Construct a partition for the graph */
  graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "PartSort: graph->where");
  firstvtx = vtxdist[mype];
  for (i=0; i<nvtxs; i++) {
    ASSERTP(ctrl, elmnts[i].key>=0 && elmnts[i].key<npes, (ctrl, "%d %d\n", i, elmnts[i].key));
    ASSERTP(ctrl, elmnts[i].val>=vtxdist[mype] && elmnts[i].val<vtxdist[mype+1], (ctrl, "%d %d %d %d\n", i, vtxdist[mype], vtxdist[mype+1], elmnts[i].val));
    graph->where[elmnts[i].val-firstvtx] = elmnts[i].key;
  }


  GKfree((void **)&mypicks, (void **)&allpicks, (void **)&perm, LTERM);
  if (wspace->nlarge < nrecv)
    free(relmnts);

}