bool getMachineCoordinate(const part_t rank, pcoord_t *xyz) {
   // Ficticious machine coordinates
   part_t slice = part_t(pow(double(this->numRanks), double(1.0/networkDim))
                             + 0.5);
   part_t m = rank;
   for (int i = 0; i < networkDim; ++i){
     xyz[i] = m / part_t(pow(slice, double(networkDim - i - 1)));
     m = m % part_t(pow(double(slice), double(networkDim - i - 1)));
   }
   return true;
 }
void AlgParMETIS<Adapter>::partition(
  const RCP<PartitioningSolution<Adapter> > &solution
)
{
  HELLO;

  size_t numGlobalParts = solution->getTargetGlobalNumberOfParts();

  int np = problemComm->getSize();

  // Get vertex info
  ArrayView<const gno_t> vtxgnos;
  ArrayView<StridedData<lno_t, scalar_t> > vwgts;
  int nVwgt = model->getNumWeightsPerVertex();
  size_t nVtx = model->getVertexList(vtxgnos, vwgts);
  pm_idx_t pm_nVtx;
  TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nVtx, nVtx);

  pm_idx_t *pm_vwgts = NULL;
  if (nVwgt) {
    pm_vwgts = new pm_idx_t[nVtx*nVwgt];
    scale_weights(nVtx, vwgts, pm_vwgts);
  }

  // Get edge info
  ArrayView<const gno_t> adjgnos;
  ArrayView<const lno_t> offsets;
  ArrayView<StridedData<lno_t, scalar_t> > ewgts;
  int nEwgt = model->getNumWeightsPerEdge();
  size_t nEdge = model->getEdgeList(adjgnos, offsets, ewgts);

  pm_idx_t *pm_ewgts = NULL;
  if (nEwgt) {
    pm_ewgts = new pm_idx_t[nEdge*nEwgt]; 
    scale_weights(nEdge, ewgts, pm_ewgts);
  }

  // Convert index types for edges, if needed
  pm_idx_t *pm_offsets;  
  TPL_Traits<pm_idx_t,const lno_t>::ASSIGN_TPL_T_ARRAY(&pm_offsets, offsets);
  pm_idx_t *pm_adjs;  
  pm_idx_t pm_dummy_adj;
  if (nEdge)
    TPL_Traits<pm_idx_t,const gno_t>::ASSIGN_TPL_T_ARRAY(&pm_adjs, adjgnos);
  else
    pm_adjs = &pm_dummy_adj;  // ParMETIS does not like NULL pm_adjs;
    

  // Build vtxdist
  pm_idx_t *pm_vtxdist;
  ArrayView<size_t> vtxdist; 
  model->getVertexDist(vtxdist);
  TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T_ARRAY(&pm_vtxdist, vtxdist);

  // ParMETIS does not like processors having no vertices.
  // Inspect vtxdist and remove from communicator procs that have no vertices
  RCP<Comm<int> > subcomm;
  MPI_Comm mpicomm;  // Note:  mpicomm is valid only while subcomm is in scope

  if (np > 1) {
    int nKeep = 0;
    Array<int> keepRanks(np);
    for (int i = 0; i < np; i++) {
      if ((pm_vtxdist[i+1] - pm_vtxdist[i]) > 0) {
        keepRanks[nKeep] = i;
        pm_vtxdist[nKeep] = pm_vtxdist[i];
        nKeep++;
      }
    }
    pm_vtxdist[nKeep] = pm_vtxdist[np];
    if (nKeep < np) {
      subcomm = problemComm->createSubcommunicator(keepRanks.view(0,nKeep));
      if (subcomm != Teuchos::null) 
        mpicomm = Teuchos::getRawMpiComm(*subcomm);
      else 
        mpicomm = MPI_COMM_NULL;
    }
    else {
      mpicomm = Teuchos::getRawMpiComm(*problemComm);
    }
  }
  else {
    mpicomm = Teuchos::getRawMpiComm(*problemComm);
  }

  // Create array for ParMETIS to return results in.
  pm_idx_t *pm_partList = NULL;
  if (nVtx) pm_partList = new pm_idx_t[nVtx];

  if (mpicomm != MPI_COMM_NULL) {
    // If in ParMETIS' communicator (i.e., have vertices), call ParMETIS

    // Get target part sizes 
    pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt));
    pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon];
    for (pm_idx_t dim = 0; dim < pm_nCon; dim++) {
      if (!solution->criteriaHasUniformPartSizes(dim))
        for (size_t i=0; i<numGlobalParts; i++)
          pm_partsizes[i*pm_nCon+dim] = 
                       pm_real_t(solution->getCriteriaPartSize(dim,i));
      else
        for (size_t i=0; i<numGlobalParts; i++)
          pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.)/pm_real_t(numGlobalParts);
    }

    // Get imbalance tolerances
    double tolerance = 1.1;
    const Teuchos::ParameterList &pl = env->getParameters();
    const Teuchos::ParameterEntry *pe = pl.getEntryPtr("imbalance_tolerance");
    if (pe) tolerance = pe->getValue<double>(&tolerance);

    pm_real_t *pm_imbTols = new pm_real_t[pm_nCon];
    for (pm_idx_t dim = 0; dim < pm_nCon; dim++)
      pm_imbTols[dim] = pm_real_t(tolerance);

    std::string parmetis_method("PARTKWAY");
    pe = pl.getEntryPtr("partitioning_approach");
    if (pe){
      std::string approach;
      approach = pe->getValue<std::string>(&approach);
      if ((approach == "repartition") || (approach == "maximize_overlap"))
        parmetis_method = "REFINE_KWAY";
      // TODO:  AdaptiveRepart
    }

    // Other ParMETIS parameters?
    pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0);
    pm_idx_t pm_numflag = 0;
    pm_idx_t pm_edgecut = -1;
    pm_idx_t pm_options[METIS_NOPTIONS];
    pm_options[0] = 1;   // Use non-default options for some ParMETIS options
    for (int i = 0; i < METIS_NOPTIONS; i++) 
      pm_options[i] = 0; // Default options
    pm_options[2] = 15;  // Matches default value used in Zoltan
  
    pm_idx_t pm_nPart;
    TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nPart, numGlobalParts);

    if (parmetis_method == "PARTKWAY") {
      ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts,
                           &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart,
                           pm_partsizes, pm_imbTols, pm_options,
                           &pm_edgecut, pm_partList, &mpicomm);
    }
    else if (parmetis_method == "ADAPTIVE_REPART") {
      // Get object sizes:  pm_vsize
      std::cout << "NOT READY FOR ADAPTIVE_REPART YET; NEED VSIZE" << std::endl;
      exit(-1);
      //pm_real_t itr = 100.;  // Same default as in Zoltan
      //ParMETIS_V3_AdaptiveRepart(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts,
      //                           pm_vsize, pm_ewgts, &pm_wgtflag,
      //                           &pm_numflag, &pm_nCon, &pm_nPart,
      //                           pm_partsizes, pm_imbTols,
      //                           &itr, pm_options,
      //                           &pm_edgecut, pm_partList, &mpicomm);
    }
    else if (parmetis_method == "REFINE_KWAY") {
      ParMETIS_V3_RefineKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts,
                             pm_ewgts,
                             &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart,
                             pm_partsizes, pm_imbTols,
                             pm_options, &pm_edgecut, pm_partList, &mpicomm);
    }

    // Clean up 
    delete [] pm_partsizes;
    delete [] pm_imbTols;
  }

  // Load answer into the solution.

  ArrayRCP<part_t> partList;
  if (nVtx) {
    if (TPL_Traits<pm_idx_t, part_t>::OK_TO_CAST_TPL_T()) {
      partList = ArrayRCP<part_t>((part_t *)pm_partList, 0, nVtx, true);
    }
    else {
      // TODO Probably should have a TPL_Traits function to do the following
      partList = ArrayRCP<part_t>(new part_t[nVtx], 0, nVtx, true);
      for (size_t i = 0; i < nVtx; i++) {
        partList[i] = part_t(pm_partList[i]);
      }
      delete [] pm_partList;
    }
  }

  solution->setParts(partList);

  env->memory("Zoltan2-ParMETIS: After creating solution");

  // Clean up copies made due to differing data sizes.
  TPL_Traits<pm_idx_t,size_t>::DELETE_TPL_T_ARRAY(&pm_vtxdist);
  TPL_Traits<pm_idx_t,const lno_t>::DELETE_TPL_T_ARRAY(&pm_offsets);
  if (nEdge)
    TPL_Traits<pm_idx_t,const gno_t>::DELETE_TPL_T_ARRAY(&pm_adjs);

  if (nVwgt) delete [] pm_vwgts;
  if (nEwgt) delete [] pm_ewgts;
}
void AlgParMETIS<Adapter>::partition(
  const RCP<PartitioningSolution<Adapter> > &solution
)
{
  HELLO;

  size_t numGlobalParts = solution->getTargetGlobalNumberOfParts();

  int np = problemComm->getSize();

  // Get vertex info
  ArrayView<const gno_t> vtxgnos;
  ArrayView<StridedData<lno_t, scalar_t> > xyz;
  ArrayView<StridedData<lno_t, scalar_t> > vwgts;
  int nVwgt = model->getNumWeightsPerVertex();
  size_t nVtx = model->getVertexList(vtxgnos, xyz, vwgts);
  pm_idx_t pm_nVtx;
  TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nVtx, nVtx, env);

  pm_idx_t *pm_vwgts = NULL;
  if (nVwgt) {
    pm_vwgts = new pm_idx_t[nVtx*nVwgt];
    scale_weights(nVtx, vwgts, pm_vwgts);
  }

  // Get edge info
  ArrayView<const gno_t> adjgnos;
  ArrayView<const int>   procs;
  ArrayView<const lno_t> offsets;
  ArrayView<StridedData<lno_t, scalar_t> > ewgts;
  int nEwgt = model->getNumWeightsPerEdge();
  size_t nEdge = model->getEdgeList(adjgnos, procs, offsets, ewgts);

  pm_idx_t *pm_ewgts = NULL;
  if (nEwgt) {
    pm_ewgts = new pm_idx_t[nEdge*nEwgt]; 
    scale_weights(nEdge, ewgts, pm_ewgts);
  }

  // Convert index types for edges, if needed
  pm_idx_t *pm_offsets;  
  TPL_Traits<pm_idx_t,lno_t>::ASSIGN_TPL_T_ARRAY(&pm_offsets, offsets, env);
  pm_idx_t *pm_adjs;  
  TPL_Traits<pm_idx_t,gno_t>::ASSIGN_TPL_T_ARRAY(&pm_adjs, adjgnos, env);

  // Build vtxdist
  pm_idx_t *pm_vtxdist = new pm_idx_t[np+1];
  pm_vtxdist[0] = 0;
  Teuchos::gatherAll(*problemComm, 1, &pm_nVtx, np, &(pm_vtxdist[1]));
  for (int i = 2; i <= np; i++)
    pm_vtxdist[i] += pm_vtxdist[i-1];

  // Create array for ParMETIS to return results in.
  // Note:  ParMETIS does not like NULL arrays,
  //        so add 1 to always have non-null.
  //        See Zoltan bug 4299.
  pm_idx_t *pm_partList = new pm_idx_t[nVtx+1];

  // Get target part sizes and imbalance tolerances

  pm_idx_t pm_nCon = (nVwgt == 0 ? 1 : pm_idx_t(nVwgt));
  pm_real_t *pm_partsizes = new pm_real_t[numGlobalParts*pm_nCon];
  for (pm_idx_t dim = 0; dim < pm_nCon; dim++) {
    if (!solution->criteriaHasUniformPartSizes(dim))
      for (size_t i=0; i<numGlobalParts; i++)
        pm_partsizes[i*pm_nCon+dim] = 
                     pm_real_t(solution->getCriteriaPartSize(dim,i));
    else
      for (size_t i=0; i<numGlobalParts; i++)
        pm_partsizes[i*pm_nCon+dim] = pm_real_t(1.) / pm_real_t(numGlobalParts);
  }
  pm_real_t *pm_imbTols = new pm_real_t[pm_nCon];
  for (pm_idx_t dim = 0; dim < pm_nCon; dim++)
    pm_imbTols[dim] = 1.05;  // TODO:  GET THE PARAMETER

  std::string parmetis_method("PARTKWAY");
  pm_idx_t pm_wgtflag = 2*(nVwgt > 0) + (nEwgt > 0);
  pm_idx_t pm_numflag = 0;

  pm_idx_t pm_nPart;
  TPL_Traits<pm_idx_t,size_t>::ASSIGN_TPL_T(pm_nPart, numGlobalParts, env);

  if (parmetis_method == "PARTKWAY") {

    pm_idx_t pm_edgecut = -1;
    pm_idx_t pm_options[3];
    pm_options[0] = 0;   // Use default options
    pm_options[1] = 0;   // Debug level (ignored if pm_options[0] == 0)
    pm_options[2] = 0;   // Seed (ignored if pm_options[0] == 0)

    ParMETIS_V3_PartKway(pm_vtxdist, pm_offsets, pm_adjs, pm_vwgts, pm_ewgts,
                         &pm_wgtflag, &pm_numflag, &pm_nCon, &pm_nPart,
                         pm_partsizes, pm_imbTols, pm_options,
                         &pm_edgecut, pm_partList, &mpicomm);
  }
  else if (parmetis_method == "ADAPTIVE_REPART") {
    // Get object sizes
    std::cout << "NOT READY FOR ADAPTIVE_REPART YET" << std::endl;
    exit(-1);
  }
  else if (parmetis_method == "PART_GEOM") {
    // Get coordinate info, too.
    std::cout << "NOT READY FOR PART_GEOM YET" << std::endl;
    exit(-1);
  }

  // Clean up 
  delete [] pm_vtxdist;
  delete [] pm_partsizes;
  delete [] pm_imbTols;

  // Load answer into the solution.

  ArrayRCP<part_t> partList;
  if (TPL_Traits<pm_idx_t, part_t>::OK_TO_CAST_TPL_T()) {
    partList = ArrayRCP<part_t>((part_t *)pm_partList, 0, nVtx, true);
  }
  else {
    // TODO Probably should have a TPL_Traits function to do the following
    partList = ArrayRCP<part_t>(new part_t[nVtx], 0, nVtx, true);
    for (size_t i = 0; i < nVtx; i++) {
      partList[i] = part_t(pm_partList[i]);
    }
    delete [] pm_partList;
  }

  solution->setParts(partList);

  env->memory("Zoltan2-ParMETIS: After creating solution");

  // Clean up copies made due to differing data sizes.
  TPL_Traits<pm_idx_t,lno_t>::DELETE_TPL_T_ARRAY(&pm_offsets);
  TPL_Traits<pm_idx_t,gno_t>::DELETE_TPL_T_ARRAY(&pm_adjs);

  if (nVwgt) delete [] pm_vwgts;
  if (nEwgt) delete [] pm_ewgts;
}
        /*! \brief  function to obtain the min and max hash values along all dimensions.
         */
        void setMinMaxHashIndices (
                scalar_t *minMaxBoundaries,
                scalar_t *sliceSizes,
                part_t numSlicePerDim
                ){
            for (int j = 0; j < dim; ++j){
                scalar_t distance = (lmins[j] - minMaxBoundaries[j]);
                part_t minInd = 0;
                if (distance > _EPSILON && sliceSizes[j] > _EPSILON){
                    minInd = static_cast<part_t>(floor((lmins[j] - minMaxBoundaries[j])/ sliceSizes[j]));
                }

                if(minInd >= numSlicePerDim){
                    minInd = numSlicePerDim - 1;
                }
                part_t maxInd = 0;
                distance = (lmaxs[j] - minMaxBoundaries[j]);
                if (distance > _EPSILON && sliceSizes[j] > _EPSILON){
                    maxInd = static_cast<part_t>(ceil((lmaxs[j] - minMaxBoundaries[j])/ sliceSizes[j]));
                }
                if(maxInd >= numSlicePerDim){
                    maxInd = numSlicePerDim - 1;
                }

                //cout << "j:" << j << " lmins:" << lmins[j] << " lmaxs:" << lmaxs[j] << endl;
                //cout << "j:" << j << " min:" << minInd << " max:" << maxInd << endl;
                minHashIndices[j] = minInd;
                maxHashIndices[j] = maxInd;
            }
            std::vector <part_t> *in = new std::vector <part_t> ();
            in->push_back(0);
            std::vector <part_t> *out = new std::vector <part_t> ();

            for (int j = 0; j < dim; ++j){

                part_t minInd = minHashIndices[j];
                part_t maxInd = maxHashIndices[j];


                part_t pScale = part_t(pow (float(numSlicePerDim), int(dim - j -1)));

                part_t inSize = in->size();

                for (part_t k = minInd; k <= maxInd; ++k){
                    for (part_t i = 0; i < inSize; ++i){
                        out->push_back((*in)[i] + k * pScale);
                    }
                }
                in->clear();
                std::vector <part_t> *tmp = in;
                in= out;
                out= tmp;
            }

            std::vector <part_t> *tmp = in;
            in = gridIndices;
            gridIndices = tmp;


            delete in;
            delete out;
        }