예제 #1
0
파일: order.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function is the driver for the partition refinement mode of ParMETIS
**************************************************************************/
void Order_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{

  SetUp(ctrl, graph, wspace);
  graph->ncon = 1;

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d][%d]\n",
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs),
        GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo,
        GlobalSEMax(ctrl, graph->vwgt[idxamax(graph->nvtxs, graph->vwgt)])));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo || (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {
    /* Compute the initial npart-way multisection */
    InitMultisection(ctrl, graph, wspace);

    if (graph->finer == NULL) { /* Do that only of no-coarsening took place */
      ComputeNodePartitionParams(ctrl, graph, wspace);
      KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION);
    }
  }
  else { /* Coarsen it and the partition it */
    Mc_LocalMatch_HEM(ctrl, graph, wspace);

    Order_Partition(ctrl, graph->coarser, wspace);

    Moc_ProjectPartition(ctrl, graph, wspace);
    ComputeNodePartitionParams(ctrl, graph, wspace);
    KWayNodeRefine(ctrl, graph, wspace, 2*NGR_PASSES, ORDER_UNBALANCE_FRACTION);
  }
}
예제 #2
0
/*************************************************************************
* This function is the driver for the adaptive refinement mode of ParMETIS
**************************************************************************/
void Adaptive_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int i;
  int tewgt, tvsize;
  floattype gtewgt, gtvsize;
  floattype ubavg, lbavg, lbvec[MAXNCON];

  /************************************/
  /* Set up important data structures */
  /************************************/
  SetUp(ctrl, graph, wspace);

  ubavg   = savg(graph->ncon, ctrl->ubvec);
  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (floattype) GlobalSESum(ctrl, tewgt) + 1.0;  /* The +1 were added to remove any FPE */
  gtvsize = (floattype) GlobalSESum(ctrl, tvsize) + 1.0;
  ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio);

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6d %8d %5d %5d][%d]\n", 
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
     (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    /***********************************************/
    /* Balance the partition on the coarsest graph */
    /***********************************************/
    graph->where = idxsmalloc(graph->nvtxs+graph->nrecv, -1, "graph->where");
    idxcopy(graph->nvtxs, graph->home, graph->where);

    Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    lbavg = savg(graph->ncon, lbvec);

    if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION)
      Balance_Partition(ctrl, graph, wspace);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");
    }

    /* check if no coarsening took place */
    if (graph->finer == NULL) {
      Moc_ComputePartitionParams(ctrl, graph, wspace);
      Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);
      Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES);
    }
  }
  else {
    /*******************************/
    /* Coarsen it and partition it */
    /*******************************/
    switch (ctrl->ps_relation) {
      case COUPLED:
        Mc_LocalMatch_HEM(ctrl, graph, wspace);
        break;
      case DISCOUPLED:
      default:
        Moc_GlobalMatch_Balance(ctrl, graph, wspace);
        break;
    }

    Adaptive_Partition(ctrl, graph->coarser, wspace);

    /********************************/
    /* project partition and refine */
    /********************************/
    Moc_ProjectPartition(ctrl, graph, wspace);
    Moc_ComputePartitionParams(ctrl, graph, wspace);

    if (graph->ncon > 1 && graph->level < 4) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      lbavg = savg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.025) {
        Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);
      }
    }

    Moc_KWayAdaptiveRefine(ctrl, graph, wspace, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");
    }
  }
}
예제 #3
0
파일: setup.c 프로젝트: KnoooW/gpgpu-sim
/*************************************************************************
* This function tests the repeated shmem_put
**************************************************************************/
void SetUp(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int i, j, k, islocal, penum, gnvtxs, nvtxs, nlocal, firstvtx, lastvtx, nsend, nrecv, nnbrs, nadj;
  int npes=ctrl->npes, mype=ctrl->mype;
  idxtype *vtxdist, *xadj, *adjncy;
  idxtype *peind, *recvptr, *recvind, *sendptr, *sendind;
  idxtype *receive, *pemap, *imap, *lperm;
  idxtype *pexadj, *peadjncy, *peadjloc, *startsind;
  KeyValueType *recvrequests, *sendrequests, *adjpairs;

  IFSET(ctrl->dbglvl, DBG_TIME, MPI_Barrier(ctrl->comm));
  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->SetupTmr));

  gnvtxs  = graph->gnvtxs;
  nvtxs   = graph->nvtxs;
  vtxdist = graph->vtxdist;
  xadj    = graph->xadj;
  adjncy  = graph->adjncy;

  firstvtx = vtxdist[mype];
  lastvtx = vtxdist[mype+1];

  pemap = wspace->pv1;
  idxset(npes, -1, pemap);

  lperm = graph->lperm = idxmalloc(nvtxs, "SetUp: graph->lperm");
  for (i=0; i<nvtxs; i++)
    lperm[i] = i;

  /************************************************************* 
   * Determine what you need to receive 
   *************************************************************/
  receive  = wspace->indices;  		/* Use the large global received array for now */
  adjpairs = wspace->pairs;

  for (nlocal = nadj = i = 0; i<nvtxs; i++) {
    islocal = 1;
    for (j=xadj[i]; j<xadj[i+1]; j++) {
      k = adjncy[j];
      if (k >= firstvtx && k < lastvtx) {
        adjncy[j] = k-firstvtx;
        continue;  /* local vertex */
      }
      adjpairs[nadj].key = k;
      adjpairs[nadj++].val = j;
      islocal = 0;
    }
    if (islocal) {
      lperm[i] = lperm[nlocal];
      lperm[nlocal++] = i;
    }
  }

  /* Take care the received part now */
  ikeysort(nadj, adjpairs);
  adjpairs[nadj].key = gnvtxs+1;  /* Boundary condition */
  for (nrecv=i=0; i<nadj; i++) {
    adjncy[adjpairs[i].val] = nvtxs+nrecv;
    if (adjpairs[i].key != adjpairs[i+1].key)
      receive[nrecv++] = adjpairs[i].key;
  }


  /* Allocate space for the setup info attached to this level of the graph */
  peind = graph->peind = idxmalloc(npes, "SetUp: peind");
  recvptr = graph->recvptr = idxmalloc(npes+1, "SetUp: recvptr");
  recvind = graph->recvind = idxmalloc(nrecv, "SetUp: recvind");

  /* Take care of the received portion */
  idxcopy(nrecv, receive, recvind);  /* Copy the vertices to be received into recvind */

  i = nnbrs = recvptr[0] = 0;
  for (penum=0; penum<npes; penum++) {
    for (j=i; j<nrecv; j++) {
      if (recvind[j] >= vtxdist[penum+1])
        break;
    }
    if (j > i) {
      peind[nnbrs] = penum;
      recvptr[++nnbrs] = j;
      i = j;
    }
  }


  /************************************************************* 
   * Determine what you need to send 
   *************************************************************/
  /* Tell the other processors what they need to send you */
  recvrequests = wspace->pepairs1;
  sendrequests = wspace->pepairs2;
  for (i=0; i<npes; i++)
    recvrequests[i].key = 0;
  for (i=0; i<nnbrs; i++) {
    recvrequests[peind[i]].key = recvptr[i+1]-recvptr[i];
    recvrequests[peind[i]].val = nvtxs+recvptr[i];
  }
  MPI_Alltoall((void *)recvrequests, 2, IDX_DATATYPE, (void *)sendrequests, 2, IDX_DATATYPE, ctrl->comm);


  sendptr = graph->sendptr = idxmalloc(npes+1, "SetUp: sendptr");
  startsind = wspace->pv2;
  for (j=i=0; i<npes; i++) {
    if (sendrequests[i].key > 0) {
      sendptr[j] = sendrequests[i].key;
      startsind[j] = sendrequests[i].val;
      j++;
    }
  }
  ASSERT(ctrl, nnbrs == j);
  MAKECSR(i, j, sendptr);

  nsend = sendptr[nnbrs];
  sendind = graph->sendind = idxmalloc(nsend, "SetUp: sendind");


  /* Issue the receives for sendind */
  for (i=0; i<nnbrs; i++) {
    MPI_Irecv((void *)(sendind+sendptr[i]), sendptr[i+1]-sendptr[i], IDX_DATATYPE, 
              peind[i], 1, ctrl->comm, ctrl->rreq+i);
  }

  /* Issue the sends. My recvind[penum] becomes penum's sendind[mype] */
  for (i=0; i<nnbrs; i++) {
    MPI_Isend((void *)(recvind+recvptr[i]), recvptr[i+1]-recvptr[i], IDX_DATATYPE,
              peind[i], 1, ctrl->comm, ctrl->sreq+i);
  }

  MPI_Waitall(nnbrs, ctrl->rreq, ctrl->statuses);
  MPI_Waitall(nnbrs, ctrl->sreq, ctrl->statuses);



  /* Create the peadjncy data structure for sparse boundary exchanges */
  pexadj = graph->pexadj = idxsmalloc(nvtxs+1, 0, "SetUp: pexadj");
  peadjncy = graph->peadjncy = idxmalloc(nsend, "SetUp: peadjncy");
  peadjloc = graph->peadjloc = idxmalloc(nsend, "SetUp: peadjloc");

  for (i=0; i<nsend; i++) {
    ASSERTP(ctrl, sendind[i] >= firstvtx && sendind[i] < lastvtx, (ctrl, "%d %d %d\n", sendind[i], firstvtx, lastvtx));
    pexadj[sendind[i]-firstvtx]++;
  }
  MAKECSR(i, nvtxs, pexadj);

  for (i=0; i<nnbrs; i++) {
    for (j=sendptr[i]; j<sendptr[i+1]; j++) {
      k = pexadj[sendind[j]-firstvtx]++;
      peadjncy[k] = i;  /* peind[i] is the actual PE number */
      peadjloc[k] = startsind[i]++;
    }
  }
  ASSERT(ctrl, pexadj[nvtxs] == nsend);

  for (i=nvtxs; i>0; i--)
    pexadj[i] = pexadj[i-1];
  pexadj[0] = 0;


  graph->nnbrs = nnbrs;
  graph->nrecv = nrecv;
  graph->nsend = nsend;
  graph->nlocal = nlocal;


  /* Create the inverse map from ladjncy to adjncy */
  imap = graph->imap = idxmalloc(nvtxs+nrecv, "SetUp: imap");
  for (i=0; i<nvtxs; i++)
    imap[i] = firstvtx+i;
  for (i=0; i<nrecv; i++)
    imap[nvtxs+i] = recvind[i];


  /* Check if wspace->nlarge is large enough for nrecv and nsend */
  if (wspace->nlarge < nrecv+nsend) {
    free(wspace->indices);
    free(wspace->pairs);
    wspace->nlarge = nrecv+nsend;
    wspace->indices = idxmalloc(wspace->nlarge, "SetUp: wspace->indices");
    wspace->pairs = (KeyValueType *)GKmalloc(sizeof(KeyValueType)*wspace->nlarge, "SetUp: wspace->pairs");
  }

  IFSET(ctrl->dbglvl, DBG_TIME, stoptimer(ctrl->SetupTmr));

#ifdef DEBUG_SETUPINFO
  rprintf(ctrl, "[%5d %5d] \tl:[%5d %5d] \ts:[%5d, %5d] \tr:[%5d, %5d]\n", 
            GlobalSEMin(ctrl, nvtxs), GlobalSEMax(ctrl, nvtxs),
            GlobalSEMin(ctrl, nlocal), GlobalSEMax(ctrl, nlocal),
            GlobalSEMin(ctrl, nsend), GlobalSEMax(ctrl, nsend),
            GlobalSEMin(ctrl, nrecv), GlobalSEMax(ctrl, nrecv));

  PrintSetUpInfo(ctrl, graph);
#endif
}
예제 #4
0
파일: mesh.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, 
                 int *numflag, int *ncommonnodes, idxtype **xadj, 
		 idxtype **adjncy, MPI_Comm *comm)
{
  int i, j, jj, k, kk, m;
  int npes, mype, pe, count, mask, pass;
  int nelms, lnns, my_nns, node;
  int firstelm, firstnode, lnode, nrecv, nsend;
  int *scounts, *rcounts, *sdispl, *rdispl;
  idxtype *nodedist, *nmap, *auxarray;
  idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
  idxtype *sbuffer, *rbuffer, *htable;
  KeyValueType *nodelist, *recvbuffer;
  idxtype ind[200], wgt[200];
  int gmaxnode, gminnode;
  CtrlType ctrl;


  SetUpCtrl(&ctrl, -1, 0, *comm);

  npes = ctrl.npes;
  mype = ctrl.mype;

  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERTS(nelms > 0);

  /* construct node distribution array */
  nodedist = idxsmalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
  auxarray = idxmalloc(eptr[nelms], "auxarray");
  htable   = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(4*npes+2, "scounts");
  rcounts  = scounts+npes;
  sdispl   = scounts+2*npes;
  rdispl   = scounts+3*npes+1;


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikeysort(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = idxmalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  MPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERTS(pe < npes);

  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERTS(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");

  MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = idxsmalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERTS(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERTS(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv = rdispl[npes];
  rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));

  MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  k = -1;
  nptr = idxsmalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERTS(k+1 == lnns);
  ASSERTS(nptr[lnns] == nrecv)

  myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
  idxset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommonnodes) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, 
         (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, 
	 (void **)&gnind, (void **)&auxarray, LTERM);

  FreeCtrl(&ctrl);

  return;
}
예제 #5
0
파일: ametis.c 프로젝트: certik/libmesh
void Adaptive_Partition(ctrl_t *ctrl, graph_t *graph)
{
  idx_t i;
  idx_t tewgt, tvsize;
  real_t gtewgt, gtvsize;
  real_t ubavg, lbavg, *lbvec;

  WCOREPUSH;

  lbvec = rwspacemalloc(ctrl, graph->ncon);

  /************************************/
  /* Set up important data structures */
  /************************************/
  CommSetup(ctrl, graph);

  ubavg   = ravg(graph->ncon, ctrl->ubvec);
  tewgt   = isum(graph->nedges, graph->adjwgt, 1);
  tvsize  = isum(graph->nvtxs, graph->vsize, 1);
  gtewgt  = (real_t) GlobalSESum(ctrl, tewgt) + 1.0/graph->gnvtxs;  /* The +1/graph->gnvtxs were added to remove any FPE */
  gtvsize = (real_t) GlobalSESum(ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl->redist_factor = ctrl->redist_base * ((gtewgt/gtvsize)/ ctrl->edge_size_ratio);

  IFSET(ctrl->dbglvl, DBG_PROGRESS, rprintf(ctrl, "[%6"PRIDX" %8"PRIDX" %5"PRIDX" %5"PRIDX"][%"PRIDX"]\n", 
        graph->gnvtxs, GlobalSESum(ctrl, graph->nedges), GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo));

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
     (graph->finer != NULL && graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    AllocateRefinementWorkSpace(ctrl, 2*graph->nedges);

    /***********************************************/
    /* Balance the partition on the coarsest graph */
    /***********************************************/
    graph->where = ismalloc(graph->nvtxs+graph->nrecv, -1, "graph->where");
    icopy(graph->nvtxs, graph->home, graph->where);

    ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
    lbavg = ravg(graph->ncon, lbvec);

    if (lbavg > ubavg + 0.035 && ctrl->partType != REFINE_PARTITION)
      Balance_Partition(ctrl, graph);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputePartitionParams(ctrl, graph);
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");

      /* free memory allocated by ComputePartitionParams */
      gk_free((void **)&graph->ckrinfo, &graph->lnpwgts, &graph->gnpwgts, LTERM);
    }

    /* check if no coarsening took place */
    if (graph->finer == NULL) {
      ComputePartitionParams(ctrl, graph);
      KWayBalance(ctrl, graph, graph->ncon);
      KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);
    }
  }
  else {
    /*******************************/
    /* Coarsen it and partition it */
    /*******************************/
    switch (ctrl->ps_relation) {
      case PARMETIS_PSR_COUPLED:
        Match_Local(ctrl, graph);
        break;
      case PARMETIS_PSR_UNCOUPLED:
      default:
        Match_Global(ctrl, graph);
        break;
    }

    Adaptive_Partition(ctrl, graph->coarser);

    /********************************/
    /* project partition and refine */
    /********************************/
    ProjectPartition(ctrl, graph);
    ComputePartitionParams(ctrl, graph);

    if (graph->ncon > 1 && graph->level < 4) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      lbavg = ravg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.025) {
        KWayBalance(ctrl, graph, graph->ncon);
      }
    }

    KWayAdaptiveRefine(ctrl, graph, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10"PRIDX", cut: %8"PRIDX", balance: ", 
          graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3"PRREAL" ", lbvec[i]);
      rprintf(ctrl, "\n");
    }
  }

  WCOREPOP;
}
예제 #6
0
/*************************************************************************
* This function is the driver to the multi-constraint partitioning algorithm.
**************************************************************************/
void Moc_Global_Partition(CtrlType *ctrl, GraphType *graph, WorkSpaceType *wspace)
{
  int i, ncon, nparts;
  floattype ftmp, ubavg, lbavg, lbvec[MAXNCON];
 
  ncon = graph->ncon;
  nparts = ctrl->nparts;
  ubavg = savg(graph->ncon, ctrl->ubvec);

  SetUp(ctrl, graph, wspace);

  if (ctrl->dbglvl&DBG_PROGRESS) {
    rprintf(ctrl, "[%6d %8d %5d %5d] [%d] [", graph->gnvtxs, GlobalSESum(ctrl, graph->nedges),
	    GlobalSEMin(ctrl, graph->nvtxs), GlobalSEMax(ctrl, graph->nvtxs), ctrl->CoarsenTo);
    for (i=0; i<ncon; i++)
      rprintf(ctrl, " %.3f", GlobalSEMinFloat(ctrl,graph->nvwgt[samin_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i]));  
    rprintf(ctrl, "] [");
    for (i=0; i<ncon; i++)
      rprintf(ctrl, " %.3f", GlobalSEMaxFloat(ctrl, graph->nvwgt[samax_strd(graph->nvtxs, graph->nvwgt+i, ncon)*ncon+i]));  
    rprintf(ctrl, "]\n");
  }

  if (graph->gnvtxs < 1.3*ctrl->CoarsenTo ||
	(graph->finer != NULL &&
	graph->gnvtxs > graph->finer->gnvtxs*COARSEN_FRACTION)) {

    /* Done with coarsening. Find a partition */
    graph->where = idxmalloc(graph->nvtxs+graph->nrecv, "graph->where");
    Moc_InitPartition_RB(ctrl, graph, wspace);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, balance: ", graph->gnvtxs);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");
    }

    /* In case no coarsening took place */
    if (graph->finer == NULL) {
      Moc_ComputePartitionParams(ctrl, graph, wspace);
      Moc_KWayFM(ctrl, graph, wspace, NGR_PASSES);
    }
  }
  else {
    Moc_GlobalMatch_Balance(ctrl, graph, wspace);

    Moc_Global_Partition(ctrl, graph->coarser, wspace);

    Moc_ProjectPartition(ctrl, graph, wspace);
    Moc_ComputePartitionParams(ctrl, graph, wspace);

    if (graph->ncon > 1 && graph->level < 3) {
      for (i=0; i<ncon; i++) {
        ftmp = ssum_strd(nparts, graph->gnpwgts+i, ncon);
        if (ftmp != 0.0)
          lbvec[i] = (floattype)(nparts) *
          graph->gnpwgts[samax_strd(nparts, graph->gnpwgts+i, ncon)*ncon+i]/ftmp;
        else
          lbvec[i] = 1.0;
      }
      lbavg = savg(graph->ncon, lbvec);

      if (lbavg > ubavg + 0.035) {
        if (ctrl->dbglvl&DBG_PROGRESS) {
          Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
          rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
          for (i=0; i<graph->ncon; i++) 
            rprintf(ctrl, "%.3f ", lbvec[i]);
          rprintf(ctrl, "\n");
	}

        Moc_KWayBalance(ctrl, graph, wspace, graph->ncon);
      }
    }

    Moc_KWayFM(ctrl, graph, wspace, NGR_PASSES);

    if (ctrl->dbglvl&DBG_PROGRESS) {
      Moc_ComputeParallelBalance(ctrl, graph, graph->where, lbvec);
      rprintf(ctrl, "nvtxs: %10d, cut: %8d, balance: ", graph->gnvtxs, graph->mincut);
      for (i=0; i<graph->ncon; i++) 
        rprintf(ctrl, "%.3f ", lbvec[i]);
      rprintf(ctrl, "\n");
    }

    if (graph->level != 0)
      GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, LTERM);
  }

  return;
}
예제 #7
0
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, 
	      int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, 
	      idxtype *part, MPI_Comm *comm)
{
  int h, i;
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;
  int tewgt, tvsize, nmoved, maxin, maxout;
  float gtewgt, gtvsize, avg, maximb;
  int ps_relation, seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /* Deal with poor vertex distributions */
  ctrl.comm = *comm;
  if (GlobalSEMin(&ctrl, vtxdist[mype+1]-vtxdist[mype]) < 1) {
    if (mype == 0)
      printf("Error: Poor vertex distribution (processor with no vertices).\n");
    return;
  }


  /********************************/
  /* Try and take care bad inputs */
  /********************************/
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];
  CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
              NULL, NULL, options, ioptions, part, comm);

  /* ADD: take care of disconnected graph */
  /* ADD: take care of highly unbalanced vtxdist */
  /*********************************/
  /* Take care the nparts = 1 case */
  /*********************************/
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); 
    *edgecut = 0;
    return;
  }

  /**************************/
  /* Set up data structures */
  /**************************/
  if (inumflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /*****************************/
  /* Set up control structures */
  /*****************************/
  if (ioptions[0] == 1) {
    dbglvl      = ioptions[PMV3_OPTION_DBGLVL];
    seed        = ioptions[PMV3_OPTION_SEED];
    ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : PARMETIS_PSR_UNCOUPLED;
  }
  else {
    dbglvl      = GLOBAL_DBGLVL;
    seed        = GLOBAL_SEED;
    ps_relation = (npes == inparts) ? PARMETIS_PSR_COUPLED : PARMETIS_PSR_UNCOUPLED;
  }

  SetUpCtrl(&ctrl, inparts, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts));
  ctrl.ipc_factor = 1000.0;
  ctrl.redist_factor = 1.0;
  ctrl.redist_base = 1.0;
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = REFINE_PARTITION;
  ctrl.ps_relation = ps_relation;
  ctrl.tpwgts = itpwgts;

  graph = Mc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize");

  graph->home = idxmalloc(graph->nvtxs, "home");
  if (ctrl.ps_relation == PARMETIS_PSR_COUPLED)
    idxset(graph->nvtxs, mype, graph->home);
  else
    idxcopy(graph->nvtxs, part, graph->home);

  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs;
  gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl.edge_size_ratio = gtewgt/gtvsize;
  scopy(incon, iubvec, ctrl.ubvec);

  AllocateWSpace(&ctrl, graph, &wspace);

  /***********************/
  /* Partition and Remap */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Adaptive_Partition(&ctrl, graph, &wspace);
  ParallelReMapGraph(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  idxcopy(graph->nvtxs, graph->where, part);
  if (edgecut != NULL)
    *edgecut = graph->mincut;

  /***********************/
  /* Take care of output */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout);
    rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    }
    rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout);
  }

  /*************************************/
  /* Free memory, renumber, and return */
  /*************************************/
  GKfree((void **)&graph->lnpwgts, &graph->gnpwgts, &graph->nvwgt, &graph->home, 
      &graph->vsize, &itpwgts, LTERM);

  FreeInitialGraphAndRemap(graph, iwgtflag, 1);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

  return;
}