예제 #1
0
/******************************************************************************
* This function takes a graph and its partition vector and creates a new
* graph corresponding to the one after the movement
*******************************************************************************/
void TestMoveGraph(graph_t *ograph, graph_t *omgraph, idx_t *part, MPI_Comm comm)
{
  idx_t npes, mype;
  ctrl_t *ctrl;
  graph_t *graph, *mgraph;
  idx_t options[5] = {0, 0, 1, 0, 0};

  gkMPI_Comm_size(comm, &npes);
  ctrl = SetupCtrl(PARMETIS_OP_KMETIS, NULL, 1, npes, NULL, NULL, comm); 
  mype = ctrl->mype;

  ctrl->CoarsenTo = 1;  /* Needed by SetUpGraph, otherwise we can FP errors */
  graph = TestSetUpGraph(ctrl, ograph->vtxdist, ograph->xadj, NULL, ograph->adjncy, NULL, 0);
  AllocateWSpace(ctrl, 0);

  CommSetup(ctrl, graph);
  graph->where = part;
  graph->ncon  = 1;
  mgraph = MoveGraph(ctrl, graph);

  omgraph->gnvtxs  = mgraph->gnvtxs;
  omgraph->nvtxs   = mgraph->nvtxs;
  omgraph->nedges  = mgraph->nedges;
  omgraph->vtxdist = mgraph->vtxdist;
  omgraph->xadj    = mgraph->xadj;
  omgraph->adjncy  = mgraph->adjncy;
  mgraph->vtxdist  = NULL;
  mgraph->xadj     = NULL;
  mgraph->adjncy   = NULL;
  FreeGraph(mgraph);

  graph->where = NULL;
  FreeInitialGraphAndRemap(graph);
  FreeCtrl(&ctrl);
}  
예제 #2
0
파일: gkmetis.c 프로젝트: KnoooW/gpgpu-sim
/***********************************************************************************
* This function is the entry point of the parallel ordering algorithm.
* This function assumes that the graph is already nice partitioned among the
* processors and then proceeds to perform recursive bisection.
************************************************************************************/
void ParMETIS_V3_PartGeom(idxtype *vtxdist, int *ndims, float *xyz, idxtype *part, MPI_Comm *comm)
{
  int i, npes, mype, nvtxs, firstvtx, dbglvl;
  idxtype *xadj, *adjncy;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;
  int zeroflg = 0;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  if (npes == 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part);
    return;
  }

  /* Setup a fake graph to allow the rest of the code to work unchanged */
  dbglvl = 0;

  nvtxs = vtxdist[mype+1]-vtxdist[mype];
  firstvtx = vtxdist[mype];
  xadj = idxmalloc(nvtxs+1, "ParMETIS_PartGeom: xadj");
  adjncy = idxmalloc(nvtxs, "ParMETIS_PartGeom: adjncy");
  for (i=0; i<nvtxs; i++) {
    xadj[i] = i;
    adjncy[i] = firstvtx + (i+1)%nvtxs;
  }
  xadj[nvtxs] = nvtxs;

  /* Proceed with the rest of the code */
  SetUpCtrl(&ctrl, npes, dbglvl, *comm);
  ctrl.seed      = mype;
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes);

  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &zeroflg);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /*=======================================================
   * Compute the initial geometric partitioning
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Coordinate_Partition(&ctrl, graph, *ndims, xyz, 0, &wspace);

  idxcopy(graph->nvtxs, graph->where, part);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

  FreeInitialGraphAndRemap(graph, 0);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  GKfree((void **)&xadj, (void **)&adjncy, LTERM);
}
예제 #3
0
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
void ParMETIS_RepartLDiffusion(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, 
       idxtype *vwgt, realtype *adjwgt, int *wgtflag, int *numflag, int *options,
       int *edgecut, idxtype *part, MPI_Comm *comm)
{
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  if (npes == 1) { /* Take care the npes = 1 case */
    idxset(vtxdist[1], 0, part);
    *edgecut = 0;
    return;
  }

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  SetUpCtrl(&ctrl, npes, options, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 70*npes);

  graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "Par_KMetis: vsize");

  PreAllocateMemory(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call AdaptiveUndirected_Partition\n",mype));
  AdaptiveUndirected_Partition(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TRACK, printf("%d ParMETIS_RepartLDiffusion about to call ReMapGraph\n",mype));
  ReMapGraph(&ctrl, graph, 0, &wspace);

  idxcopy(graph->nvtxs, graph->where, part);
  *edgecut = graph->mincut;

  IMfree((void**)&graph->vsize, LTERM);
  FreeInitialGraphAndRemap(graph, *wgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (*numflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);
}
예제 #4
0
파일: fused.c 프로젝트: mrklein/ParMGridGen
/***********************************************************************************
* This function creates the fused-element-graph and returns the partition
************************************************************************************/
void ParMETIS_FusedElementGraph(idxtype *vtxdist, idxtype *xadj, realtype *vvol,
              realtype *vsurf, idxtype *adjncy, idxtype *vwgt, realtype *adjwgt,
              int *wgtflag, int *numflag, int *nparts, int *options,
              idxtype *part, MPI_Comm *comm)
{
  int npes, mype, nvtxs;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  nvtxs = vtxdist[mype+1]-vtxdist[mype];

  /* IFSET(options[OPTION_DBGLVL], DBG_TRACK, printf("%d ParMETIS_FEG npes=%d\n",mype, npes)); */

  SetUpCtrl(&ctrl, *nparts, options, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, *nparts));

  graph = SetUpGraph(&ctrl, vtxdist, xadj, vwgt, adjncy, adjwgt, *wgtflag);

  graph->where = part;

  PreAllocateMemory(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  CreateFusedElementGraph(&ctrl, graph, &wspace, numflag);

  idxcopy(nvtxs, graph->where, part);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  if (((*wgtflag)&2) == 0)
    IMfree((void**)&graph->vwgt, LTERM);
  IMfree((void**)&graph->lperm, &graph->peind, &graph->pexadj, &graph->peadjncy,
         &graph->peadjloc, &graph->recvptr, &graph->recvind, &graph->sendptr,
         &graph->imap, &graph->sendind, &graph, LTERM);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);
}
int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy, 
          idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, 
          real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *objval, 
          idx_t *part)
{
  int sigrval=0, renumber=0;
  graph_t *graph;
  ctrl_t *ctrl;

  /* set up malloc cleaning code and signal catchers */
  if (!gk_malloc_init()) 
    return METIS_ERROR_MEMORY;

  gk_sigtrap();

  if ((sigrval = gk_sigcatch()) != 0)
    goto SIGTHROW;


  /* set up the run parameters */
  ctrl = SetupCtrl(METIS_OP_KMETIS, options, *ncon, *nparts, tpwgts, ubvec);
  if (!ctrl) {
    gk_siguntrap();
    return METIS_ERROR_INPUT;
  }

  /* if required, change the numbering to 0 */
  if (ctrl->numflag == 1) {
    Change2CNumbering(*nvtxs, xadj, adjncy);
    renumber = 1;
  }

  /* set up the graph */
  graph = SetupGraph(ctrl, *nvtxs, *ncon, xadj, adjncy, vwgt, vsize, adjwgt);

  /* set up multipliers for making balance computations easier */
  SetupKWayBalMultipliers(ctrl, graph);

  /* set various run parameters that depend on the graph */
  if (ctrl->iptype == METIS_IPTYPE_METISRB) {
    ctrl->CoarsenTo = gk_max((*nvtxs)/(40*gk_log2(*nparts)), 30*(*nparts));
    ctrl->CoarsenTo = 10*(*nparts);
    ctrl->nIparts   = (ctrl->CoarsenTo == 30*(*nparts) ? 4 : 5);
  }
  else {
    ctrl->CoarsenTo = 10*(*nparts);
    ctrl->nIparts   = 10;
  }

  /* take care contiguity requests for disconnected graphs */
  if (ctrl->contig && !IsConnected(graph, 0)) 
    gk_errexit(SIGERR, "METIS Error: A contiguous partition is requested for a non-contiguous input graph.\n");
    
  /* allocate workspace memory */  
  AllocateWorkSpace(ctrl, graph);

  /* start the partitioning */
  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startwctimer(ctrl->TotalTmr));

  *objval = MlevelKWayPartitioning(ctrl, graph, part);

  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopwctimer(ctrl->TotalTmr));
  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));

  /* clean up */
  FreeCtrl(&ctrl);

SIGTHROW:
  /* if required, change the numbering back to 1 */
  if (renumber)
    Change2FNumbering(*nvtxs, xadj, adjncy, part);

  gk_siguntrap();
  gk_malloc_cleanup(0);

  return metis_rcode(sigrval);
}
예제 #6
0
파일: ometis.c 프로젝트: arbenson/Clique
int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
          idx_t *options, idx_t *perm, idx_t *iperm) 
{
  int sigrval=0, renumber=0;
  idx_t i, ii, j, l, nnvtxs=0;
  graph_t *graph=NULL;
  ctrl_t *ctrl;
  idx_t *cptr, *cind, *piperm;
  int numflag = 0;

  /* set up malloc cleaning code and signal catchers */
  if (!gk_malloc_init()) 
    return METIS_ERROR_MEMORY;

  gk_sigtrap();

  if ((sigrval = gk_sigcatch()) != 0) 
    goto SIGTHROW;


  /* set up the run time parameters */
  ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL);
  if (!ctrl) {
    gk_siguntrap();
    return METIS_ERROR_INPUT;
  }

  /* if required, change the numbering to 0 */
  if (ctrl->numflag == 1) {
    Change2CNumbering(*nvtxs, xadj, adjncy);
    renumber = 1;
  }

  IFSET(ctrl->dbglvl, METIS_DBG_TIME, InitTimers(ctrl));
  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_startcputimer(ctrl->TotalTmr));

  /* prune the dense columns */
  if (ctrl->pfactor > 0.0) { 
    piperm = imalloc(*nvtxs, "OMETIS: piperm");

    graph = PruneGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, piperm, ctrl->pfactor);
    if (graph == NULL) {
      /* if there was no prunning, cleanup the pfactor */
      gk_free((void **)&piperm, LTERM);
      ctrl->pfactor = 0.0;
    }
    else {
      nnvtxs = graph->nvtxs;
      ctrl->compress = 0;  /* disable compression if prunning took place */
    }
  }

  /* compress the graph; note that compression only happens if not prunning 
     has taken place. */
  if (ctrl->compress) { 
    cptr = imalloc(*nvtxs+1, "OMETIS: cptr");
    cind = imalloc(*nvtxs, "OMETIS: cind");

    graph = CompressGraph(ctrl, *nvtxs, xadj, adjncy, vwgt, cptr, cind);
    if (graph == NULL) {
      /* if there was no compression, cleanup the compress flag */
      gk_free((void **)&cptr, &cind, LTERM);
      ctrl->compress = 0; 
    }
    else {
      nnvtxs = graph->nvtxs;
      ctrl->cfactor = 1.0*(*nvtxs)/nnvtxs;
      if (ctrl->cfactor > 1.5 && ctrl->nseps == 1)
        ctrl->nseps = 2;
      //ctrl->nseps = (idx_t)(ctrl->cfactor*ctrl->nseps);
    }
  }

  /* if no prunning and no compression, setup the graph in the normal way. */
  if (ctrl->pfactor == 0.0 && ctrl->compress == 0) 
    graph = SetupGraph(ctrl, *nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);

  ASSERT(CheckGraph(graph, ctrl->numflag, 1));

  /* allocate workspace memory */
  AllocateWorkSpace(ctrl, graph);

  /* do the nested dissection ordering  */
  if (ctrl->ccorder) 
    MlevelNestedDissectionCC(ctrl, graph, iperm, graph->nvtxs);
  else
    MlevelNestedDissection(ctrl, graph, iperm, graph->nvtxs);


  if (ctrl->pfactor > 0.0) { /* Order any prunned vertices */
    icopy(nnvtxs, iperm, perm);  /* Use perm as an auxiliary array */
    for (i=0; i<nnvtxs; i++)
      iperm[piperm[i]] = perm[i];
    for (i=nnvtxs; i<*nvtxs; i++)
      iperm[piperm[i]] = i;

    gk_free((void **)&piperm, LTERM);
  }
  else if (ctrl->compress) { /* Uncompress the ordering */
    /* construct perm from iperm */
    for (i=0; i<nnvtxs; i++)
      perm[iperm[i]] = i; 
    for (l=ii=0; ii<nnvtxs; ii++) {
      i = perm[ii];
      for (j=cptr[i]; j<cptr[i+1]; j++)
        iperm[cind[j]] = l++;
    }

    gk_free((void **)&cptr, &cind, LTERM);
  }

  for (i=0; i<*nvtxs; i++)
    perm[iperm[i]] = i;

  IFSET(ctrl->dbglvl, METIS_DBG_TIME, gk_stopcputimer(ctrl->TotalTmr));
  IFSET(ctrl->dbglvl, METIS_DBG_TIME, PrintTimers(ctrl));

  /* clean up */
  FreeCtrl(&ctrl);

SIGTHROW:
  /* if required, change the numbering back to 1 */
  if (renumber)
    Change2FNumberingOrder(*nvtxs, xadj, adjncy, perm, iperm);

  gk_siguntrap();
  gk_malloc_cleanup(0);

  return metis_rcode(sigrval);
}
예제 #7
0
파일: ometis.c 프로젝트: KnoooW/gpgpu-sim
/***********************************************************************************
* This function is the entry point of the parallel ordering algorithm.
* This function assumes that the graph is already nice partitioned among the 
* processors and then proceeds to perform recursive bisection.
************************************************************************************/
void ParMETIS_V3_NodeND(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy, int *numflag,
              int *options, idxtype *order, idxtype *sizes, MPI_Comm *comm)
{
  int i, j;
  int ltvwgts[MAXNCON];
  int nparts, npes, mype, wgtflag = 0, seed = GLOBAL_SEED;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  idxtype *morder;
  int minnvtxs;

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);
  nparts = npes;

  if (!ispow2(npes)) {
    if (mype == 0)
      printf("Error: The number of processors must be a power of 2!\n");
    return;
  }

  if (vtxdist[npes] < (int)((float)(npes*npes)*1.2)) {
    if (mype == 0)
      printf("Error: Too many processors for this many vertices.\n");
    return;
  }

  minnvtxs = vtxdist[1]-vtxdist[0];
  for (i=0; i<npes; i++)
    minnvtxs = (minnvtxs < vtxdist[i+1]-vtxdist[i]) ? minnvtxs : vtxdist[i+1]-vtxdist[i];

  if (minnvtxs < (int)((float)npes*1.1)) {
    if (mype == 0)
      printf("Error: vertices are not distributed equally.\n");
    return;
  }
 

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1);

  SetUpCtrl(&ctrl, nparts, options[PMV3_OPTION_DBGLVL], *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*npes);

  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*amax(npes, nparts));
  ctrl.seed = mype;
  ctrl.sync = seed;
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = fsmalloc(nparts, 1.0/(float)(nparts), "tpwgts");
  ctrl.ubvec[0] = 1.03;

  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, NULL, adjncy, NULL, &wgtflag);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /*=======================================================
   * Compute the initial k-way partitioning 
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Moc_Global_Partition(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

  /*=======================================================
   * Move the graph according to the partitioning
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  MALLOC_CHECK(NULL);
  graph->ncon = 1;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);
  MALLOC_CHECK(NULL);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

  /*=======================================================
   * Now compute an ordering of the moved graph
   =======================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  FreeWSpace(&wspace);
  PreAllocateMemory(&ctrl, mgraph, &wspace);

  ctrl.ipart = ISEP_NODE;
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, amax(20*npes, 1000));

  /* compute tvwgts */
  for (j=0; j<mgraph->ncon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      ltvwgts[j] += mgraph->vwgt[i*mgraph->ncon+j];

  for (j=0; j<mgraph->ncon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  mgraph->nvwgt = fmalloc(mgraph->nvtxs*mgraph->ncon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<mgraph->ncon; j++)
      mgraph->nvwgt[i*mgraph->ncon+j] = (float)(mgraph->vwgt[i*mgraph->ncon+j]) / (float)(ctrl.tvwgts[j]);


  morder = idxmalloc(mgraph->nvtxs, "PAROMETIS: morder");
  MultilevelOrder(&ctrl, mgraph, morder, sizes, &wspace);

  MALLOC_CHECK(NULL);

  /* Invert the ordering back to the original graph */
  ProjectInfoBack(&ctrl, graph, order, morder, &wspace);

  MALLOC_CHECK(NULL);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  free(ctrl.tpwgts);
  free(morder);
  FreeGraph(mgraph);
  FreeInitialGraphAndRemap(graph, 0);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (*numflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0);

  MALLOC_CHECK(NULL);
}
예제 #8
0
파일: gkmetis.c 프로젝트: KnoooW/gpgpu-sim
/***********************************************************************************
* This function is the entry point of the parallel kmetis algorithm that uses
* coordinates to compute an initial graph distribution.
************************************************************************************/
void ParMETIS_V3_PartGeomKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ndims, 
	      float *xyz, int *ncon, int *nparts, float *tpwgts, float *ubvec, 
	      int *options, int *edgecut, idxtype *part, MPI_Comm *comm)
{
  int h, i, j;
  int nvtxs = -1, npes, mype;
  int uwgtflag, cut, gcut, maxnvtxs;
  int ltvwgts[MAXNCON];
  int moptions[10];
  CtrlType ctrl;
  idxtype *uvwgt;
  WorkSpaceType wspace;
  GraphType *graph, *mgraph;
  float avg, maximb, balance, *mytpwgts;
  int seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /********************************/
  /* Try and take care bad inputs */
  /********************************/
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];

  CheckInputs(STATIC_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
	      NULL, NULL, options, ioptions, part, comm);


  /*********************************/
  /* Take care the nparts = 1 case */
  /*********************************/
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part);
    *edgecut = 0;
    return;
  }

  /******************************/
  /* Take care of npes = 1 case */
  /******************************/
  if (npes == 1 && inparts > 1) {
    moptions[0] = 0;
    nvtxs = vtxdist[1];

    if (incon == 1) {
      METIS_WPartGraphKway(&nvtxs, xadj, adjncy, vwgt, adjwgt, &iwgtflag, &inumflag, 
            &inparts, itpwgts, moptions, edgecut, part);
    }
    else {
      /* ADD: this is because METIS does not support tpwgts for all constraints */
      mytpwgts = fmalloc(inparts, "mytpwgts");
      for (i=0; i<inparts; i++)
        mytpwgts[i] = itpwgts[i*incon];

      moptions[7] = -1;
      METIS_mCPartGraphRecursive2(&nvtxs, &incon, xadj, adjncy, vwgt, adjwgt, &iwgtflag, 
            &inumflag, &inparts, mytpwgts, moptions, edgecut, part);

      free(mytpwgts);
    }

    return;
  }


  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /*****************************/
  /* Set up control structures */
  /*****************************/
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
  }
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
  }
  SetUpCtrl(&ctrl, npes, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 25*incon*amax(npes, inparts));
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = STATIC_PARTITION;
  ctrl.ps_relation = -1;
  ctrl.tpwgts = itpwgts;
  scopy(incon, iubvec, ctrl.ubvec);

  uwgtflag = iwgtflag|2;
  uvwgt = idxsmalloc(vtxdist[mype+1]-vtxdist[mype], 1, "uvwgt");
  graph = Moc_SetUpGraph(&ctrl, 1, vtxdist, xadj, uvwgt, adjncy, adjwgt, &uwgtflag);
  free(graph->nvwgt); graph->nvwgt = NULL;

  PreAllocateMemory(&ctrl, graph, &wspace);

  /*=================================================================
   * Compute the initial npes-way partitioning geometric partitioning
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Coordinate_Partition(&ctrl, graph, *ndims, xyz, 1, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));

  /*=================================================================
   * Move the graph according to the partitioning
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.MoveTmr));

  free(uvwgt);
  graph->vwgt = ((iwgtflag&2) != 0) ? vwgt : idxsmalloc(graph->nvtxs*incon, 1, "vwgt");
  graph->ncon = incon;
  j = ctrl.nparts;
  ctrl.nparts = ctrl.npes;
  mgraph = Moc_MoveGraph(&ctrl, graph, &wspace);
  ctrl.nparts = j;

  /**********************************************************/
  /* Do the same functionality as Moc_SetUpGraph for mgraph */
  /**********************************************************/
  /* compute tvwgts */
  for (j=0; j<incon; j++)
    ltvwgts[j] = 0;

  for (i=0; i<graph->nvtxs; i++)
    for (j=0; j<incon; j++)
      ltvwgts[j] += mgraph->vwgt[i*incon+j];

  for (j=0; j<incon; j++)
    ctrl.tvwgts[j] = GlobalSESum(&ctrl, ltvwgts[j]);

  /* check for zero wgt constraints */
  for (i=0; i<incon; i++) {
    /* ADD: take care of the case in which tvwgts is zero */
    if (ctrl.tvwgts[i] == 0) {
      if (ctrl.mype == 0) printf("ERROR: sum weight for constraint %d is zero\n", i);
      MPI_Finalize();
      exit(-1);
    }
  }

  /* compute nvwgt */
  mgraph->nvwgt = fmalloc(mgraph->nvtxs*incon, "mgraph->nvwgt");
  for (i=0; i<mgraph->nvtxs; i++)
    for (j=0; j<incon; j++)
      mgraph->nvwgt[i*incon+j] = (float)(mgraph->vwgt[i*incon+j]) / (float)(ctrl.tvwgts[j]);


  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.MoveTmr));

  if (ctrl.dbglvl&DBG_INFO) {
    cut = 0;
    for (i=0; i<graph->nvtxs; i++)
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++)
        if (graph->where[i] != graph->where[graph->adjncy[j]])
          cut += graph->adjwgt[j];
    gcut = GlobalSESum(&ctrl, cut)/2;
    maxnvtxs = GlobalSEMax(&ctrl, mgraph->nvtxs);
    balance = (float)(maxnvtxs)/((float)(graph->gnvtxs)/(float)(npes));
    rprintf(&ctrl, "XYZ Cut: %6d \tBalance: %6.3f [%d %d %d]\n",
      gcut, balance, maxnvtxs, graph->gnvtxs, npes);

  }

  /*=================================================================
   * Set up the newly moved graph
   =================================================================*/
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  ctrl.nparts = inparts;
  FreeWSpace(&wspace);
  PreAllocateMemory(&ctrl, mgraph, &wspace);

  /*=======================================================
   * Now compute the partition of the moved graph
   =======================================================*/
  if (vtxdist[npes] < SMALLGRAPH || vtxdist[npes] < npes*20 || GlobalSESum(&ctrl, mgraph->nedges) == 0) {
    IFSET(ctrl.dbglvl, DBG_INFO, rprintf(&ctrl, "Partitioning a graph of size %d serially\n", vtxdist[npes]));
    PartitionSmallGraph(&ctrl, mgraph, &wspace);
  }
  else {
    Moc_Global_Partition(&ctrl, mgraph, &wspace);
  }
  ParallelReMapGraph(&ctrl, mgraph, &wspace);

  /* Invert the ordering back to the original graph */
  ctrl.nparts = npes;
  ProjectInfoBack(&ctrl, graph, part, mgraph->where, &wspace);

  *edgecut = mgraph->mincut;

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  /*******************/
  /* Print out stats */
  /*******************/
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    rprintf(&ctrl, "Final %d-way CUT: %6d \tBalance: ", inparts, mgraph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, mgraph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    }
    rprintf(&ctrl, "  avg: %.3f\n", avg/(float)incon);
  }

  GKfree((void **)&itpwgts, LTERM);
  FreeGraph(mgraph);
  FreeInitialGraphAndRemap(graph, iwgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

}
예제 #9
0
파일: rmetis.c 프로젝트: ADTG-VSSC/SU2
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
int ParMETIS_V3_RefineKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, 
        idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts, 
        real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part, 
        MPI_Comm *comm)
{
  idx_t npes, mype, status;
  ctrl_t *ctrl=NULL;
  graph_t *graph=NULL;
  size_t curmem;


  /* Check the input parameters and return if an error */
  status = CheckInputsPartKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag, 
               numflag, ncon, nparts, tpwgts, ubvec, options, edgecut, part, comm);
  if (GlobalSEMinComm(*comm, status) == 0) 
    return METIS_ERROR;

  status = METIS_OK;
  gk_malloc_init();
  curmem = gk_GetCurMemoryUsed();

  /* Setup ctrl */
  ctrl = SetupCtrl(PARMETIS_OP_RMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm);
  npes = ctrl->npes;
  mype = ctrl->mype;


  /* Take care the nparts == 1 case */
  if (*nparts == 1) {
    iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part); 
    *edgecut = 0;
    goto DONE;
  }


  /* setup the graph */
  if (*numflag > 0) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag);

  if (ctrl->ps_relation == PARMETIS_PSR_COUPLED)
    iset(graph->nvtxs, mype, graph->home);
  else
    icopy(graph->nvtxs, part, graph->home);


  /* Allocate workspace */
  AllocateWSpace(ctrl, 10*graph->nvtxs);


  /* Partition and Remap */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 50*(*ncon)*gk_max(npes, *nparts));

  Adaptive_Partition(ctrl, graph);
  ParallelReMapGraph(ctrl, graph);

  icopy(graph->nvtxs, graph->where, part);
  *edgecut = graph->mincut;

  STOPTIMER(ctrl, ctrl->TotalTmr);

  /* Take care of output */
  IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl));
  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm));
  IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, graph, 1));

  FreeInitialGraphAndRemap(graph);

  if (*numflag > 0)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

DONE:
  FreeCtrl(&ctrl);
  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));
  }
  gk_malloc_cleanup(0);

  return (int)status;
}
예제 #10
0
ctrl_t *SetupCtrl(moptype_et optype, idx_t *options, idx_t ncon, idx_t nparts, 
            real_t *tpwgts, real_t *ubvec)
{
  idx_t i, j;
  ctrl_t *ctrl;

  ctrl = (ctrl_t *)gk_malloc(sizeof(ctrl_t), "SetupCtrl: ctrl");
  
  memset((void *)ctrl, 0, sizeof(ctrl_t));

  switch (optype) {
    case METIS_OP_PMETIS:
      ctrl->objtype   = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT);
      ctrl->ctype     = GETOPTION(options, METIS_OPTION_CTYPE,   METIS_CTYPE_SHEM);
      ctrl->rtype     = METIS_RTYPE_FM;
      ctrl->ncuts     = GETOPTION(options, METIS_OPTION_NCUTS,   1);
      ctrl->niter     = GETOPTION(options, METIS_OPTION_NITER,   10);
      ctrl->seed      = GETOPTION(options, METIS_OPTION_SEED,    -1);
      ctrl->dbglvl    = GETOPTION(options, METIS_OPTION_DBGLVL,  0);

      if (ncon == 1) {
        ctrl->iptype    = GETOPTION(options, METIS_OPTION_IPTYPE,  METIS_IPTYPE_GROW);
        ctrl->ufactor   = GETOPTION(options, METIS_OPTION_UFACTOR, PMETIS_DEFAULT_UFACTOR);
        ctrl->CoarsenTo = 20;
      }
      else {
        ctrl->iptype    = GETOPTION(options, METIS_OPTION_IPTYPE,  METIS_IPTYPE_RANDOM);
        ctrl->ufactor   = GETOPTION(options, METIS_OPTION_UFACTOR, MCPMETIS_DEFAULT_UFACTOR);
        ctrl->CoarsenTo = 100;
      }

      break;


    case METIS_OP_KMETIS:
      ctrl->objtype  = GETOPTION(options, METIS_OPTION_OBJTYPE, METIS_OBJTYPE_CUT);
      ctrl->ctype    = GETOPTION(options, METIS_OPTION_CTYPE,   METIS_CTYPE_SHEM);
      ctrl->iptype   = METIS_IPTYPE_METISRB;
      ctrl->rtype    = METIS_RTYPE_GREEDY;
      ctrl->ncuts    = GETOPTION(options, METIS_OPTION_NCUTS,   1);
      ctrl->niter    = GETOPTION(options, METIS_OPTION_NITER,   10);
      ctrl->ufactor  = GETOPTION(options, METIS_OPTION_UFACTOR, KMETIS_DEFAULT_UFACTOR);
      ctrl->minconn  = GETOPTION(options, METIS_OPTION_MINCONN, 0);
      ctrl->contig   = GETOPTION(options, METIS_OPTION_CONTIG,  0);
      ctrl->seed     = GETOPTION(options, METIS_OPTION_SEED,    -1);
      ctrl->dbglvl   = GETOPTION(options, METIS_OPTION_DBGLVL,  0);
      break;


    case METIS_OP_OMETIS:
      ctrl->objtype  = GETOPTION(options, METIS_OPTION_OBJTYPE,  METIS_OBJTYPE_NODE);
      ctrl->ctype    = GETOPTION(options, METIS_OPTION_CTYPE,    METIS_CTYPE_SHEM);
      ctrl->rtype    = GETOPTION(options, METIS_OPTION_RTYPE,    METIS_RTYPE_SEP1SIDED);
      ctrl->iptype   = GETOPTION(options, METIS_OPTION_IPTYPE,   METIS_IPTYPE_EDGE);
      ctrl->nseps    = GETOPTION(options, METIS_OPTION_NSEPS,    1);
      ctrl->niter    = GETOPTION(options, METIS_OPTION_NITER,    10);
      ctrl->ufactor  = GETOPTION(options, METIS_OPTION_UFACTOR,  OMETIS_DEFAULT_UFACTOR);
      ctrl->compress = GETOPTION(options, METIS_OPTION_COMPRESS, 1);
      ctrl->ccorder  = GETOPTION(options, METIS_OPTION_CCORDER,  0);
      ctrl->seed     = GETOPTION(options, METIS_OPTION_SEED,     -1);
      ctrl->dbglvl   = GETOPTION(options, METIS_OPTION_DBGLVL,   0);
      ctrl->pfactor  = 0.1*GETOPTION(options, METIS_OPTION_PFACTOR,  0);

      ctrl->CoarsenTo = 100;
      break;

    default:
      gk_errexit(SIGERR, "Unknown optype of %d\n", optype);
  }

  ctrl->numflag  = GETOPTION(options, METIS_OPTION_NUMBERING, 0);
  ctrl->optype   = optype;
  ctrl->ncon     = ncon;
  ctrl->nparts   = nparts;
  ctrl->maxvwgt  = ismalloc(ncon, 0, "SetupCtrl: maxvwgt");


  /* setup the target partition weights */
  if (ctrl->optype != METIS_OP_OMETIS) {
    ctrl->tpwgts = rmalloc(nparts*ncon, "SetupCtrl: ctrl->tpwgts");
    if (tpwgts) {
      rcopy(nparts*ncon, tpwgts, ctrl->tpwgts);
    }
    else {
      for (i=0; i<nparts; i++) {
        for (j=0; j<ncon; j++)
          ctrl->tpwgts[i*ncon+j] = 1.0/nparts;
      }
    }
  }
  else {  /* METIS_OP_OMETIS */
    /* this is required to allow the pijbm to be defined properly for
       the edge-based refinement during initial partitioning */
    ctrl->tpwgts    = rsmalloc(2, .5,  "SetupCtrl: ctrl->tpwgts");
  }


  /* setup the ubfactors */
  ctrl->ubfactors = rsmalloc(ctrl->ncon, I2RUBFACTOR(ctrl->ufactor), "SetupCtrl: ubfactors");
  if (ubvec)
    rcopy(ctrl->ncon, ubvec, ctrl->ubfactors);
  for (i=0; i<ctrl->ncon; i++)
    ctrl->ubfactors[i] += 0.0000499;

  /* Allocate memory for balance multipliers. 
     Note that for PMETIS/OMETIS routines the memory allocated is more 
     than required as balance multipliers for 2 parts is sufficient. */
  ctrl->pijbm = rmalloc(nparts*ncon, "SetupCtrl: ctrl->pijbm");

  InitRandom(ctrl->seed);

  IFSET(ctrl->dbglvl, METIS_DBG_INFO, PrintCtrl(ctrl));

  if (!CheckParams(ctrl)) {
    FreeCtrl(&ctrl);
    return NULL;
  }
  else {
    return ctrl;
  }
}
예제 #11
0
파일: gkmetis.c 프로젝트: certik/libmesh
/***********************************************************************************
* This function is the entry point of the parallel kmetis algorithm that uses
* coordinates to compute an initial graph distribution.
************************************************************************************/
int ParMETIS_V3_PartGeomKway(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy,
        idx_t *vwgt, idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ndims, 
	real_t *xyz, idx_t *ncon, idx_t *nparts, real_t *tpwgts, real_t *ubvec, 
	idx_t *options, idx_t *edgecut, idx_t *part, MPI_Comm *comm)
{
  idx_t h, i, j, npes, mype, status, nvtxs, seed, dbglvl;
  idx_t cut, gcut, maxnvtxs;
  idx_t moptions[METIS_NOPTIONS];
  ctrl_t *ctrl;
  graph_t *graph, *mgraph;
  real_t balance;
  size_t curmem;

  /* Check the input parameters and return if an error */
  status = CheckInputsPartGeomKway(vtxdist, xadj, adjncy, vwgt, adjwgt, wgtflag,
                numflag, ndims, xyz, ncon, nparts, tpwgts, ubvec, options, 
                edgecut, part, comm);
  if (GlobalSEMinComm(*comm, status) == 0)
    return METIS_ERROR;

  status = METIS_OK;
  gk_malloc_init();
  curmem = gk_GetCurMemoryUsed();

  /* Setup the ctrl */
  ctrl = SetupCtrl(PARMETIS_OP_GKMETIS, options, *ncon, *nparts, tpwgts, ubvec, *comm);
  npes = ctrl->npes;
  mype = ctrl->mype;

  /* Take care the nparts == 1 case */
  if (*nparts == 1) {
    iset(vtxdist[mype+1]-vtxdist[mype], (*numflag == 0 ? 0 : 1), part);
    *edgecut = 0;
    goto DONE;
  }


  /* Take care of npes == 1 case */
  if (npes == 1) {
    nvtxs = vtxdist[1] - vtxdist[0];  /* subtraction is required when numflag==1 */

    METIS_SetDefaultOptions(moptions);
    moptions[METIS_OPTION_NUMBERING] = *numflag;

    status = METIS_PartGraphKway(&nvtxs, ncon, xadj, adjncy, vwgt, NULL, adjwgt, 
                 nparts, tpwgts, ubvec, moptions, edgecut, part);

    goto DONE;
  }


  /* Setup the graph */
  if (*numflag > 0)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  graph = SetupGraph(ctrl, *ncon, vtxdist, xadj, vwgt, NULL, adjncy, adjwgt, *wgtflag);
  gk_free((void **)&graph->nvwgt, LTERM); 


  /* Allocate the workspace */
  AllocateWSpace(ctrl, 10*graph->nvtxs);


  /* Compute the initial npes-way partitioning geometric partitioning */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  Coordinate_Partition(ctrl, graph, *ndims, xyz, 1);

  STOPTIMER(ctrl, ctrl->TotalTmr);


  /* Move the graph according to the partitioning */
  STARTTIMER(ctrl, ctrl->MoveTmr);

  ctrl->nparts = npes;
  mgraph = MoveGraph(ctrl, graph);
  ctrl->nparts = *nparts;

  SetupGraph_nvwgts(ctrl, mgraph); /* compute nvwgts for the moved graph */

  if (ctrl->dbglvl&DBG_INFO) {
    CommInterfaceData(ctrl, graph, graph->where, graph->where+graph->nvtxs);
    for (cut=0, i=0; i<graph->nvtxs; i++) {
      for (j=graph->xadj[i]; j<graph->xadj[i+1]; j++) {
        if (graph->where[i] != graph->where[graph->adjncy[j]])
          cut += graph->adjwgt[j];
      }
    }
    gcut     = GlobalSESum(ctrl, cut)/2;
    maxnvtxs = GlobalSEMax(ctrl, mgraph->nvtxs);
    balance  = (real_t)(maxnvtxs)/((real_t)(graph->gnvtxs)/(real_t)(npes));
    rprintf(ctrl, "XYZ Cut: %6"PRIDX" \tBalance: %6.3"PRREAL" [%"PRIDX" %"PRIDX" %"PRIDX"]\n",
       gcut, balance, maxnvtxs, graph->gnvtxs, npes);
  }

  STOPTIMER(ctrl, ctrl->MoveTmr);


  /* Compute the partition of the moved graph */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  ctrl->CoarsenTo = gk_min(vtxdist[npes]+1, 25*(*ncon)*gk_max(npes, *nparts));

  if (vtxdist[npes] < SMALLGRAPH 
      || vtxdist[npes] < npes*20 
      || GlobalSESum(ctrl, mgraph->nedges) == 0) { /* serially */
    IFSET(ctrl->dbglvl, DBG_INFO, 
        rprintf(ctrl, "Partitioning a graph of size %"PRIDX" serially\n", vtxdist[npes]));
    PartitionSmallGraph(ctrl, mgraph);
  }
  else { /* in parallel */
    Global_Partition(ctrl, mgraph);
  }

  ParallelReMapGraph(ctrl, mgraph);

  /* Invert the ordering back to the original graph */
  ctrl->nparts = npes;
  ProjectInfoBack(ctrl, graph, part, mgraph->where);
  ctrl->nparts = *nparts;

  *edgecut = mgraph->mincut;

  STOPTIMER(ctrl, ctrl->TotalTmr);


  /* Print some stats */
  IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl));
  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm));
  IFSET(ctrl->dbglvl, DBG_INFO, PrintPostPartInfo(ctrl, mgraph, 0));

  FreeGraph(mgraph);
  FreeInitialGraphAndRemap(graph);

  if (*numflag > 0)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

DONE:
  FreeCtrl(&ctrl);
  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));
  }
  gk_malloc_cleanup(0);

  return (int)status;
}
예제 #12
0
파일: gkmetis.c 프로젝트: certik/libmesh
/***********************************************************************************
* This function is the entry point of the parallel ordering algorithm.
* This function assumes that the graph is already nice partitioned among the
* processors and then proceeds to perform recursive bisection.
************************************************************************************/
int ParMETIS_V3_PartGeom(idx_t *vtxdist, idx_t *ndims, real_t *xyz, idx_t *part, 
         MPI_Comm *comm)
{
  idx_t i, nvtxs, firstvtx, npes, mype, status;
  idx_t *xadj, *adjncy;
  ctrl_t *ctrl=NULL;
  graph_t *graph=NULL;
  size_t curmem;


  /* Check the input parameters and return if an error */
  status = CheckInputsPartGeom(vtxdist, ndims, xyz, part, comm);
  if (GlobalSEMinComm(*comm, status) == 0)
    return METIS_ERROR;

  status = METIS_OK;
  gk_malloc_init();
  curmem = gk_GetCurMemoryUsed();

  /* Setup the ctrl */
  ctrl = SetupCtrl(PARMETIS_OP_GMETIS, NULL, 1, 1, NULL, NULL, *comm);
  /*ctrl->dbglvl=15;*/
  npes = ctrl->npes;
  mype = ctrl->mype;


  /* Trivial case when npes == 1 */
  if (npes == 1) {
    iset(vtxdist[mype+1]-vtxdist[mype], 0, part);
    goto DONE;
  }


  /* Setup a fake graph to allow the rest of the code to work unchanged */
  nvtxs    = vtxdist[mype+1]-vtxdist[mype];
  firstvtx = vtxdist[mype];
  xadj     = imalloc(nvtxs+1, "ParMETIS_PartGeom: xadj");
  adjncy   = imalloc(nvtxs, "ParMETIS_PartGeom: adjncy");
  for (i=0; i<nvtxs; i++) {
    xadj[i] = i;
    adjncy[i] = firstvtx + (i+1)%nvtxs;
  }
  xadj[nvtxs] = nvtxs;

  graph = SetupGraph(ctrl, 1, vtxdist, xadj, NULL, NULL, adjncy, NULL, 0);


  /* Allocate workspace memory */
  AllocateWSpace(ctrl, 5*graph->nvtxs);


  /* Compute the initial geometric partitioning */
  STARTTIMER(ctrl, ctrl->TotalTmr);

  Coordinate_Partition(ctrl, graph, *ndims, xyz, 0);
  icopy(graph->nvtxs, graph->where, part);

  STOPTIMER(ctrl, ctrl->TotalTmr);
  IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl));


  gk_free((void **)&xadj, (void **)&adjncy, LTERM);
  FreeInitialGraphAndRemap(graph);


DONE:
  FreeCtrl(&ctrl);
  if (gk_GetCurMemoryUsed() - curmem > 0) {
    printf("ParMETIS appears to have a memory leak of %zdbytes. Report this.\n",
        (ssize_t)(gk_GetCurMemoryUsed() - curmem));
  }
  gk_malloc_cleanup(0);

  return (int)status;
}
예제 #13
0
/**************************************************************************
* mexFunction: gateway routine for MATLAB interface.
***************************************************************************/
void mexFunction
(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    
    // Argument checking
    if (nrhs != 5)
        mexErrMsgIdAndTxt(FUNC_NAME, "Wrong input.");
    if (nlhs != 3)
        mexErrMsgIdAndTxt(FUNC_NAME, "Wrong output.");
    
    // Input and output variables
    idx_t nvtxs = (idx_t) mxGetScalar(nvtxs_in);
    idx_t *xadj; GetIdxArray(xadj_in,&xadj);
    idx_t *adjncy; GetIdxArray(adjncy_in,&adjncy);
    idx_t *vwgt; GetIdxArray(vwgt_in,&vwgt);
    idx_t options[METIS_NOPTIONS];
    GetOptions(options_in, options);
    idx_t *sepidx;
    idx_t *lgraphidx;
    idx_t *rgraphidx;
    
    // Metis main function
    idx_t i, nnvtxs=0;
    idx_t ptlgraph, ptrgraph, ptsep;
    graph_t *graph=NULL;
    ctrl_t *ctrl;
    idx_t *piperm;
    idx_t snvtxs[3];
    idx_t *where;

    /* set up malloc cleaning code and signal catchers */
    if (!gk_malloc_init()) 
        CheckReturn( METIS_ERROR_MEMORY, FUNC_NAME );

    // set up the run time parameters
    ctrl = SetupCtrl(METIS_OP_OMETIS, options, 1, 3, NULL, NULL);

    // prune the dense columns
    if (ctrl->pfactor > 0.0)
    { 
        piperm = imalloc(nvtxs, "OMETIS: piperm");

        graph = PruneGraph(ctrl, nvtxs, xadj, adjncy, vwgt,
                piperm, ctrl->pfactor);
        if (graph == NULL)
        {
            // if there was no prunning, cleanup the pfactor
            gk_free((void **)&piperm, LTERM);
            ctrl->pfactor = 0.0;
        }
        else
        {
            nnvtxs = graph->nvtxs;
            // disable compression if prunning took place
            ctrl->compress = 0;
        }
    }

    // compress the graph
    if (ctrl->compress)
        ctrl->compress = 0; 

    // if no prunning and no compression, setup the graph in the normal way.
    if (ctrl->pfactor == 0.0 && ctrl->compress == 0) 
        graph = SetupGraph(ctrl, nvtxs, 1, xadj, adjncy, vwgt, NULL, NULL);

    ASSERT(CheckGraph(graph, ctrl->numflag, 1));

    /* allocate workspace memory */
    AllocateWorkSpace(ctrl, graph);

    MlevelNodeBisectionMultiple(ctrl, graph);

    snvtxs[0] = 0;
    snvtxs[1] = 0;
    snvtxs[2] = 0;

    if (ctrl->pfactor > 0.0)
        snvtxs[2] += nvtxs-nnvtxs;

    where = graph->where;
    for (i=0; i<graph->nvtxs; i++)
        snvtxs[where[i]]++;

    lgraphidx = (idx_t*) mxCalloc (snvtxs[0], sizeof(idx_t));
    rgraphidx = (idx_t*) mxCalloc (snvtxs[1], sizeof(idx_t));
    sepidx    = (idx_t*) mxCalloc (snvtxs[2], sizeof(idx_t));

    ptlgraph = 0;
    ptrgraph = 0;
    ptsep    = 0;

    if (ctrl->pfactor > 0.0)
    {
        for (i=0; i<graph->nvtxs; i++)
            if (where[i] == 0)
                lgraphidx[ptlgraph++] = piperm[i];
            else if (where[i] == 1)
                rgraphidx[ptrgraph++] = piperm[i];
            else
                sepidx[ptsep++] = piperm[i];

        for (i=nnvtxs; i<nvtxs; i++)
            sepidx[ptsep++] = piperm[i];

        gk_free((void **)&piperm, LTERM);
    }
    else
    {
        for (i=0; i<graph->nvtxs; i++)
            if (where[i] == 0)
                lgraphidx[ptlgraph++] = i;
            else if (where[i] == 1)
                rgraphidx[ptrgraph++] = i;
            else
                sepidx[ptsep++] = i;
    }

    /* clean up */
    FreeCtrl(&ctrl);

    // Output
    lgraphidx_out = mxCreateDoubleMatrix(1,ptlgraph,mxREAL);
    mxSetData(lgraphidx_out,mxMalloc(sizeof(double)*ptlgraph));
    double *lgraphidx_out_pr = mxGetPr(lgraphidx_out);
    for(idx_t i=0; i<ptlgraph; i++)
        lgraphidx_out_pr[i] = (double) lgraphidx[i];
    rgraphidx_out = mxCreateDoubleMatrix(1,ptrgraph,mxREAL);
    mxSetData(rgraphidx_out,mxMalloc(sizeof(double)*ptrgraph));
    double *rgraphidx_out_pr = mxGetPr(rgraphidx_out);
    for(idx_t i=0; i<ptrgraph; i++)
        rgraphidx_out_pr[i] = (double) rgraphidx[i];
    sepidx_out = mxCreateDoubleMatrix(1,ptsep,mxREAL);
    mxSetData(sepidx_out,mxMalloc(sizeof(double)*ptsep));
    double *sepidx_out_pr = mxGetPr(sepidx_out);
    for(idx_t i=0; i<ptsep; i++)
        sepidx_out_pr[i] = (double) sepidx[i];
}
예제 #14
0
파일: mesh.c 프로젝트: davidheryanto/sc14
/*************************************************************************
* This function converts a mesh into a dual graph
**************************************************************************/
void ParMETIS_V3_Mesh2Dual(idxtype *elmdist, idxtype *eptr, idxtype *eind, 
                 int *numflag, int *ncommonnodes, idxtype **xadj, 
		 idxtype **adjncy, MPI_Comm *comm)
{
  int i, j, jj, k, kk, m;
  int npes, mype, pe, count, mask, pass;
  int nelms, lnns, my_nns, node;
  int firstelm, firstnode, lnode, nrecv, nsend;
  int *scounts, *rcounts, *sdispl, *rdispl;
  idxtype *nodedist, *nmap, *auxarray;
  idxtype *gnptr, *gnind, *nptr, *nind, *myxadj, *myadjncy = NULL;
  idxtype *sbuffer, *rbuffer, *htable;
  KeyValueType *nodelist, *recvbuffer;
  idxtype ind[200], wgt[200];
  int gmaxnode, gminnode;
  CtrlType ctrl;


  SetUpCtrl(&ctrl, -1, 0, *comm);

  npes = ctrl.npes;
  mype = ctrl.mype;

  nelms = elmdist[mype+1]-elmdist[mype];

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, NULL, NULL, NULL, npes, mype, 1);

  mask = (1<<11)-1;

  /*****************************/
  /* Determine number of nodes */
  /*****************************/
  gminnode = GlobalSEMin(&ctrl, eind[idxamin(eptr[nelms], eind)]);
  for (i=0; i<eptr[nelms]; i++)
    eind[i] -= gminnode;

  gmaxnode = GlobalSEMax(&ctrl, eind[idxamax(eptr[nelms], eind)]);


  /**************************/
  /* Check for input errors */
  /**************************/
  ASSERTS(nelms > 0);

  /* construct node distribution array */
  nodedist = idxsmalloc(npes+1, 0, "nodedist");
  for (nodedist[0]=0, i=0,j=gmaxnode+1; i<npes; i++) {
    k = j/(npes-i);
    nodedist[i+1] = nodedist[i]+k;
    j -= k;
  }
  my_nns = nodedist[mype+1]-nodedist[mype];
  firstnode = nodedist[mype];

  nodelist = (KeyValueType *)GKmalloc(eptr[nelms]*sizeof(KeyValueType), "nodelist");
  auxarray = idxmalloc(eptr[nelms], "auxarray");
  htable   = idxsmalloc(amax(my_nns, mask+1), -1, "htable");
  scounts  = imalloc(4*npes+2, "scounts");
  rcounts  = scounts+npes;
  sdispl   = scounts+2*npes;
  rdispl   = scounts+3*npes+1;


  /*********************************************/
  /* first find a local numbering of the nodes */
  /*********************************************/
  for (i=0; i<nelms; i++) {
    for (j=eptr[i]; j<eptr[i+1]; j++) {
      nodelist[j].key = eind[j];
      nodelist[j].val = j;
      auxarray[j]     = i; /* remember the local element ID that uses this node */
    }
  }
  ikeysort(eptr[nelms], nodelist);

  for (count=1, i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key)
      count++;
  }

  lnns = count;
  nmap = idxmalloc(lnns, "nmap");

  /* renumber the nodes of the elements array */
  count = 1;
  nmap[0] = nodelist[0].key;
  eind[nodelist[0].val] = 0;
  nodelist[0].val = auxarray[nodelist[0].val];  /* Store the local element ID */
  for (i=1; i<eptr[nelms]; i++) {
    if (nodelist[i].key > nodelist[i-1].key) {
      nmap[count] = nodelist[i].key;
      count++;
    }
    eind[nodelist[i].val] = count-1;
    nodelist[i].val = auxarray[nodelist[i].val];  /* Store the local element ID */
  }
  MPI_Barrier(*comm);

  /**********************************************************/
  /* perform comms necessary to construct node-element list */
  /**********************************************************/
  iset(npes, 0, scounts);
  for (pe=i=0; i<eptr[nelms]; i++) {
    while (nodelist[i].key >= nodedist[pe+1])
      pe++;
    scounts[pe] += 2;
  }
  ASSERTS(pe < npes);

  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  ASSERTS(sdispl[npes] == eptr[nelms]*2);

  nrecv = rdispl[npes]/2;
  recvbuffer = (KeyValueType *)GKmalloc(amax(1, nrecv)*sizeof(KeyValueType), "recvbuffer");

  MPI_Alltoallv((void *)nodelist, scounts, sdispl, IDX_DATATYPE, (void *)recvbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  /**************************************/
  /* construct global node-element list */
  /**************************************/
  gnptr = idxsmalloc(my_nns+1, 0, "gnptr");

  for (i=0; i<npes; i++) {
    for (j=rdispl[i]/2; j<rdispl[i+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      ASSERTS(lnode >= 0 && lnode < my_nns)

      gnptr[lnode]++;
    }
  }
  MAKECSR(i, my_nns, gnptr);

  gnind = idxmalloc(amax(1, gnptr[my_nns]), "gnind");
  for (pe=0; pe<npes; pe++) {
    firstelm = elmdist[pe];
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      gnind[gnptr[lnode]++] = recvbuffer[j].val+firstelm;
    }
  }
  SHIFTCSR(i, my_nns, gnptr);


  /*********************************************************/
  /* send the node-element info to the relevant processors */
  /*********************************************************/
  iset(npes, 0, scounts);

  /* use a hash table to ensure that each node is sent to a proc only once */
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        scounts[pe] += gnptr[lnode+1]-gnptr[lnode];
        htable[lnode] = 1;
      }
    }

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }


  MPI_Alltoall((void *)scounts, 1, MPI_INT, (void *)rcounts, 1, MPI_INT, *comm);

  icopy(npes, scounts, sdispl);
  MAKECSR(i, npes, sdispl);

  /* create the send buffer */
  nsend = sdispl[npes];
  sbuffer = (idxtype *)realloc(nodelist, sizeof(idxtype)*amax(1, nsend));

  count = 0;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      if (htable[lnode] == -1) {
        for (k=gnptr[lnode]; k<gnptr[lnode+1]; k++) {
          if (k == gnptr[lnode])
            sbuffer[count++] = -1*(gnind[k]+1);
          else
            sbuffer[count++] = gnind[k];
        }
        htable[lnode] = 1;
      }
    }
    ASSERTS(count == sdispl[pe+1]);

    /* now reset the hash table */
    for (j=rdispl[pe]/2; j<rdispl[pe+1]/2; j++) {
      lnode = recvbuffer[j].key-firstnode;
      htable[lnode] = -1;
    }
  }

  icopy(npes, rcounts, rdispl);
  MAKECSR(i, npes, rdispl);

  nrecv = rdispl[npes];
  rbuffer = (idxtype *)realloc(recvbuffer, sizeof(idxtype)*amax(1, nrecv));

  MPI_Alltoallv((void *)sbuffer, scounts, sdispl, IDX_DATATYPE, (void *)rbuffer, 
                rcounts, rdispl, IDX_DATATYPE, *comm);

  k = -1;
  nptr = idxsmalloc(lnns+1, 0, "nptr");
  nind = rbuffer;
  for (pe=0; pe<npes; pe++) {
    for (j=rdispl[pe]; j<rdispl[pe+1]; j++) {
      if (nind[j] < 0) {
        k++;
        nind[j] = (-1*nind[j])-1;
      }
      nptr[k]++;
    }
  }
  MAKECSR(i, lnns, nptr);

  ASSERTS(k+1 == lnns);
  ASSERTS(nptr[lnns] == nrecv)

  myxadj = *xadj = idxsmalloc(nelms+1, 0, "xadj");
  idxset(mask+1, -1, htable);

  firstelm = elmdist[mype];

  /* Two passes -- in first pass, simply find out the memory requirements */
  for (pass=0; pass<2; pass++) {
    for (i=0; i<nelms; i++) {
      for (count=0, j=eptr[i]; j<eptr[i+1]; j++) {
        node = eind[j];

        for (k=nptr[node]; k<nptr[node+1]; k++) {
          if ((kk=nind[k]) == firstelm+i) 
	    continue;
	    
          m = htable[(kk&mask)];

          if (m == -1) {
            ind[count] = kk;
            wgt[count] = 1;
            htable[(kk&mask)] = count++;
          }
          else {
            if (ind[m] == kk) { 
              wgt[m]++;
            }
            else {
              for (jj=0; jj<count; jj++) {
                if (ind[jj] == kk) {
                  wgt[jj]++;
                  break;
	        }
              }
              if (jj == count) {
                ind[count]   = kk;
                wgt[count++] = 1;
              }
	    }
          }
        }
      }

      for (j=0; j<count; j++) {
        htable[(ind[j]&mask)] = -1;
        if (wgt[j] >= *ncommonnodes) {
          if (pass == 0) 
            myxadj[i]++;
          else 
            myadjncy[myxadj[i]++] = ind[j];
	}
      }
    }

    if (pass == 0) {
      MAKECSR(i, nelms, myxadj);
      myadjncy = *adjncy = idxmalloc(myxadj[nelms], "adjncy");
    }
    else {
      SHIFTCSR(i, nelms, myxadj);
    }
  }

  /*****************************************/
  /* correctly renumber the elements array */
  /*****************************************/
  for (i=0; i<eptr[nelms]; i++)
    eind[i] = nmap[eind[i]] + gminnode;

  if (*numflag == 1) 
    ChangeNumberingMesh2(elmdist, eptr, eind, myxadj, myadjncy, NULL, npes, mype, 0);

  /* do not free nodelist, recvbuffer, rbuffer */
  GKfree((void **)&scounts, (void **)&nodedist, (void **)&nmap, (void **)&sbuffer, 
         (void **)&htable, (void **)&nptr, (void **)&nind, (void **)&gnptr, 
	 (void **)&gnind, (void **)&auxarray, LTERM);

  FreeCtrl(&ctrl);

  return;
}
예제 #15
0
파일: pspases.c 프로젝트: ADTG-VSSC/SU2
/***********************************************************************************
* This function is the entry point of the serial ordering algorithm.
************************************************************************************/
int ParMETIS_SerialNodeND(idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, 
        idx_t *numflag, idx_t *options, idx_t *order, idx_t *sizes, 
        MPI_Comm *comm)
{
  idx_t i, npes, mype;
  ctrl_t *ctrl=NULL;
  graph_t *agraph=NULL;
  idx_t *perm=NULL, *iperm=NULL;
  idx_t *sendcount, *displs;

  /* Setup the ctrl */
  ctrl = SetupCtrl(PARMETIS_OP_OMETIS, options, 1, 1, NULL, NULL, *comm);
  npes = ctrl->npes;
  mype = ctrl->mype;

  if (!ispow2(npes)) {
    if (mype == 0)
      printf("Error: The number of processors must be a power of 2!\n");
    FreeCtrl(&ctrl);
    return METIS_ERROR;
  }


  if (*numflag > 0) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 1);

  STARTTIMER(ctrl, ctrl->TotalTmr);
  STARTTIMER(ctrl, ctrl->MoveTmr);

  agraph = AssembleEntireGraph(ctrl, vtxdist, xadj, adjncy);

  STOPTIMER(ctrl, ctrl->MoveTmr);

  if (mype == 0) {
    perm  = imalloc(agraph->nvtxs, "PAROMETISS: perm");
    iperm = imalloc(agraph->nvtxs, "PAROMETISS: iperm");

    METIS_NodeNDP(agraph->nvtxs, agraph->xadj, agraph->adjncy, 
        agraph->vwgt, npes, NULL, perm, iperm, sizes);
  }

  STARTTIMER(ctrl, ctrl->MoveTmr);

  /* Broadcast the sizes array */
  gkMPI_Bcast((void *)sizes, 2*npes, IDX_T, 0, ctrl->gcomm);

  /* Scatter the iperm */
  sendcount = imalloc(npes, "PAROMETISS: sendcount");
  displs = imalloc(npes, "PAROMETISS: displs");
  for (i=0; i<npes; i++) {
    sendcount[i] = vtxdist[i+1]-vtxdist[i];
    displs[i] = vtxdist[i];
  }

  gkMPI_Scatterv((void *)iperm, sendcount, displs, IDX_T, (void *)order, 
      vtxdist[mype+1]-vtxdist[mype], IDX_T, 0, ctrl->gcomm);

  STOPTIMER(ctrl, ctrl->MoveTmr);
  STOPTIMER(ctrl, ctrl->TotalTmr);
  IFSET(ctrl->dbglvl, DBG_TIME, PrintTimingInfo(ctrl));
  IFSET(ctrl->dbglvl, DBG_TIME, gkMPI_Barrier(ctrl->gcomm));

  gk_free((void **)&agraph->xadj, &agraph->adjncy, &perm, &iperm, 
      &sendcount, &displs, &agraph, LTERM);

  if (*numflag > 0) 
    ChangeNumbering(vtxdist, xadj, adjncy, order, npes, mype, 0);

  goto DONE;

DONE:
  FreeCtrl(&ctrl);
  return METIS_OK;
}
예제 #16
0
파일: rmetis.c 프로젝트: KnoooW/gpgpu-sim
/***********************************************************************************
* This function is the entry point of the parallel multilevel local diffusion
* algorithm. It uses parallel undirected diffusion followed by adaptive k-way 
* refinement. This function utilizes local coarsening.
************************************************************************************/
void ParMETIS_V3_RefineKway(idxtype *vtxdist, idxtype *xadj, idxtype *adjncy,
              idxtype *vwgt, idxtype *adjwgt, int *wgtflag, int *numflag, int *ncon, 
	      int *nparts, float *tpwgts, float *ubvec, int *options, int *edgecut, 
	      idxtype *part, MPI_Comm *comm)
{
  int h, i;
  int npes, mype;
  CtrlType ctrl;
  WorkSpaceType wspace;
  GraphType *graph;
  int tewgt, tvsize, nmoved, maxin, maxout;
  float gtewgt, gtvsize, avg, maximb;
  int ps_relation, seed, dbglvl = 0;
  int iwgtflag, inumflag, incon, inparts, ioptions[10];
  float *itpwgts, iubvec[MAXNCON];

  MPI_Comm_size(*comm, &npes);
  MPI_Comm_rank(*comm, &mype);

  /********************************/
  /* Try and take care bad inputs */
  /********************************/
  if (options != NULL && options[0] == 1)
    dbglvl = options[PMV3_OPTION_DBGLVL];
  CheckInputs(REFINE_PARTITION, npes, dbglvl, wgtflag, &iwgtflag, numflag, &inumflag,
              ncon, &incon, nparts, &inparts, tpwgts, &itpwgts, ubvec, iubvec, 
              NULL, NULL, options, ioptions, part, comm);

  /* ADD: take care of disconnected graph */
  /* ADD: take care of highly unbalanced vtxdist */
  /*********************************/
  /* Take care the nparts = 1 case */
  /*********************************/
  if (inparts <= 1) {
    idxset(vtxdist[mype+1]-vtxdist[mype], 0, part); 
    *edgecut = 0;
    return;
  }

  /**************************/
  /* Set up data structures */
  /**************************/
  if (inumflag == 1) 
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 1);

  /*****************************/
  /* Set up control structures */
  /*****************************/
  if (ioptions[0] == 1) {
    dbglvl = ioptions[PMV3_OPTION_DBGLVL];
    seed = ioptions[PMV3_OPTION_SEED];
    ps_relation = (npes == inparts) ? ioptions[PMV3_OPTION_PSR] : DISCOUPLED;
  }
  else {
    dbglvl = GLOBAL_DBGLVL;
    seed = GLOBAL_SEED;
    ps_relation = (npes == inparts) ? COUPLED : DISCOUPLED;
  }

  SetUpCtrl(&ctrl, inparts, dbglvl, *comm);
  ctrl.CoarsenTo = amin(vtxdist[npes]+1, 50*incon*amax(npes, inparts));
  ctrl.ipc_factor = 1000.0;
  ctrl.redist_factor = 1.0;
  ctrl.redist_base = 1.0;
  ctrl.seed = (seed == 0) ? mype : seed*mype;
  ctrl.sync = GlobalSEMax(&ctrl, seed);
  ctrl.partType = REFINE_PARTITION;
  ctrl.ps_relation = ps_relation;
  ctrl.tpwgts = itpwgts;

  graph = Moc_SetUpGraph(&ctrl, incon, vtxdist, xadj, vwgt, adjncy, adjwgt, &iwgtflag);
  graph->vsize = idxsmalloc(graph->nvtxs, 1, "vsize");

  graph->home = idxmalloc(graph->nvtxs, "home");
  if (ctrl.ps_relation == COUPLED)
    idxset(graph->nvtxs, mype, graph->home);
  else
    idxcopy(graph->nvtxs, part, graph->home);

  tewgt   = idxsum(graph->nedges, graph->adjwgt);
  tvsize  = idxsum(graph->nvtxs, graph->vsize);
  gtewgt  = (float) GlobalSESum(&ctrl, tewgt) + 1.0/graph->gnvtxs;
  gtvsize = (float) GlobalSESum(&ctrl, tvsize) + 1.0/graph->gnvtxs;
  ctrl.edge_size_ratio = gtewgt/gtvsize;
  scopy(incon, iubvec, ctrl.ubvec);

  PreAllocateMemory(&ctrl, graph, &wspace);

  /***********************/
  /* Partition and Remap */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, InitTimers(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, starttimer(ctrl.TotalTmr));

  Adaptive_Partition(&ctrl, graph, &wspace);
  ParallelReMapGraph(&ctrl, graph, &wspace);

  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));
  IFSET(ctrl.dbglvl, DBG_TIME, stoptimer(ctrl.TotalTmr));

  idxcopy(graph->nvtxs, graph->where, part);
  if (edgecut != NULL)
    *edgecut = graph->mincut;

  /***********************/
  /* Take care of output */
  /***********************/
  IFSET(ctrl.dbglvl, DBG_TIME, PrintTimingInfo(&ctrl));
  IFSET(ctrl.dbglvl, DBG_TIME, MPI_Barrier(ctrl.gcomm));

  if (ctrl.dbglvl&DBG_INFO) {
    Mc_ComputeMoveStatistics(&ctrl, graph, &nmoved, &maxin, &maxout);
    rprintf(&ctrl, "Final %3d-way Cut: %6d \tBalance: ", inparts, graph->mincut);
    avg = 0.0;
    for (h=0; h<incon; h++) {
      maximb = 0.0;
      for (i=0; i<inparts; i++)
        maximb = amax(maximb, graph->gnpwgts[i*incon+h]/itpwgts[i*incon+h]);
      avg += maximb;
      rprintf(&ctrl, "%.3f ", maximb);
    }
    rprintf(&ctrl, "\nNMoved: %d %d %d %d\n", nmoved, maxin, maxout, maxin+maxout);
  }

  /*************************************/
  /* Free memory, renumber, and return */
  /*************************************/
  GKfree((void **)&graph->lnpwgts, (void **)&graph->gnpwgts, (void **)&graph->nvwgt, (void **)(&graph->home), (void **)(&graph->vsize), LTERM);

  GKfree((void **)&itpwgts, LTERM);
  FreeInitialGraphAndRemap(graph, iwgtflag);
  FreeWSpace(&wspace);
  FreeCtrl(&ctrl);

  if (inumflag == 1)
    ChangeNumbering(vtxdist, xadj, adjncy, part, npes, mype, 0);

  return;
}