std::tuple<ptrdiff_t, ptrdiff_t> partition(const distributed_matrix<B> &A, idx_t npart, std::vector<ptrdiff_t> &perm) const { communicator comm = A.comm(); idx_t n = A.loc_rows(); int active = (n > 0); std::vector<idx_t> ptr; std::vector<idx_t> col; symm_graph(A, ptr, col); idx_t wgtflag = 0; idx_t numflag = 0; idx_t options = 0; idx_t edgecut = 0; idx_t ncon = 1; std::vector<real_t> tpwgts(npart, 1.0 / npart); std::vector<real_t> ubvec(ncon, 1.05); std::vector<idx_t> part(n); if (!n) part.reserve(1); // So that part.data() is not NULL MPI_Comm scomm; MPI_Comm_split(comm, active ? 0 : MPI_UNDEFINED, comm.rank, &scomm); if (active) { communicator sc(scomm); std::vector<idx_t> vtxdist = sc.exclusive_sum(n); sc.check( METIS_OK == ParMETIS_V3_PartKway( &vtxdist[0], &ptr[0], &col[0], NULL, NULL, &wgtflag, &numflag, &ncon, &npart, &tpwgts[0], &ubvec[0], &options, &edgecut, &part[0], &scomm), "Error in ParMETIS" ); MPI_Comm_free(&scomm); } return graph_perm_index(comm, npart, part, perm); }
void ParMETISGraphPartitionerImpl::p_partition(void) { int me(this->processor_rank()); std::vector<idx_t> vtxdist; std::vector<idx_t> xadj; std::vector<idx_t> adjncy; ParMETISGraphWrapper wrap(p_adjacency_list); wrap.get_csr_local(vtxdist, xadj, adjncy); int nnodes(vtxdist[me+1] - vtxdist[me]); #if 0 for (int p = 0; p < this->processor_size(); ++p) { if (this->processor_rank() == p) { std::cout << "Processor " << p << ": nodes: "; for (Index n = 0; n < nnodes; ++n) { std::cout << p_adjacency_list.node_index(n) << ","; } std::cout << std::endl; std::cout << "Processor " << p << ": vtxdist: "; std::copy(vtxdist.begin(), vtxdist.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; std::cout << "Processor " << p << ": xadj: "; std::copy(xadj.begin(), xadj.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; std::cout << "Processor " << p << ": adjncy: "; std::copy(adjncy.begin(), adjncy.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; } this->communicator().barrier(); } #endif // Call the partitioner (try to use variable names that match the documentation) int status; idx_t ncon(1); idx_t wgtflag(3), numflag(0); idx_t nparts(this->processor_size()); std::vector<idx_t> vwgt(nnodes, 1); std::vector<idx_t> adjwgt(adjncy.size(), 2); std::vector<real_t> tpwgts(nparts*ncon, 1.0/static_cast<real_t>(nparts)); real_t ubvec(1.05); std::vector<idx_t> options(3); options[0] = 1; options[1] = 127; options[2] = 14; MPI_Comm comm(this->communicator()); idx_t edgecut; std::vector<idx_t> part(nnodes); status = ParMETIS_V3_PartKway(&vtxdist[0], &xadj[0], &adjncy[0], &vwgt[0], &adjwgt[0], &wgtflag, &numflag, &ncon, &nparts, &tpwgts[0], &ubvec, &options[0], &edgecut, &part[0], &comm); if (status != 0) { // FIXME: throw an exception } // "part" contains the destination processors; transfer this to the // local array wrap.set_partition(vtxdist, part); wrap.get_partition(p_node_destinations); }
bool ParMetisPartitioner::partition(map<int, double>& elemWeights, PartitionMode mode) { FUNCNAME("ParMetisPartitioner::partition()"); int mpiSize = mpiComm->Get_size(); // === Create parmetis mesh === if (parMetisMesh) delete parMetisMesh; TEST_EXIT_DBG(elementInRank.size() != 0)("Should not happen!\n"); parMetisMesh = new ParMetisMesh(mesh, mpiComm, elementInRank, mapLocalGlobal); int nElements = parMetisMesh->getNumElements(); // === Create weight array === vector<int> wgts(nElements); vector<float> floatWgts(nElements); unsigned int floatWgtsPos = 0; float maxWgt = 0.0; TraverseStack stack; ElInfo* elInfo = stack.traverseFirst(mesh, 0, Mesh::CALL_EL_LEVEL); while (elInfo) { int index = elInfo->getElement()->getIndex(); if (elementInRank[index]) { // get weight float wgt = static_cast<float>(elemWeights[index]); maxWgt = std::max(wgt, maxWgt); // write float weight TEST_EXIT_DBG(floatWgtsPos < floatWgts.size())("Should not happen!\n"); floatWgts[floatWgtsPos++] = wgt; } elInfo = stack.traverseNext(elInfo); } TEST_EXIT_DBG(floatWgtsPos == floatWgts.size())("Should not happen!\n"); float tmp; mpiComm->Allreduce(&maxWgt, &tmp, 1, MPI_FLOAT, MPI_MAX); maxWgt = tmp; // === Create dual graph === ParMetisGraph parMetisGraph(parMetisMesh, mpiComm); // === Partitioning of dual graph === int wgtflag = 2; // weights at vertices only! int numflag = 0; // c numbering style! int ncon = 1; // one weight at each vertex! int nparts = mpiSize; // number of partitions vector<double> tpwgts(mpiSize); double ubvec = 1.05; int options[4] = {0, 0, 15, PARMETIS_PSR_COUPLED}; // default options int edgecut = -1; vector<int> part(nElements); // set tpwgts for (int i = 0; i < mpiSize; i++) tpwgts[i] = 1.0 / static_cast<double>(nparts); // float scale = 10000.0 / maxWgt; for (int i = 0; i < nElements; i++) wgts[i] = floatWgts[i]; // wgts[i] = static_cast<int>(floatWgts[i] * scale); // === Start ParMETIS. === MPI_Comm tmpComm = MPI_Comm(*mpiComm); switch (mode) { case INITIAL: ParMETIS_V3_PartKway(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, options, &edgecut, &(part[0]), &tmpComm); break; case ADAPTIVE_REPART: { vector<int> vsize(nElements); for (int i = 0; i < nElements; i++) vsize[i] = static_cast<int>(floatWgts[i]); ParMETIS_V3_AdaptiveRepart(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &(vsize[0]), &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, &itr, options, &edgecut, &(part[0]), &tmpComm); } break; case REFINE_PART: ParMETIS_V3_RefineKway(parMetisMesh->getElementDist(), parMetisGraph.getXAdj(), parMetisGraph.getAdjncy(), &(wgts[0]), NULL, &wgtflag, &numflag, &ncon, &nparts, &(tpwgts[0]), &ubvec, options, &edgecut, &(part[0]), &tmpComm); break; default: ERROR_EXIT("unknown partitioning mode\n"); } // === Distribute new partition data. === return distributePartitioning(&(part[0])); }