void Cluster::subdivide(unsigned bmin) { const unsigned size(_end - _beg); if (size > bmin) { idx_t n_rows(static_cast<idx_t>(_l_adj_mat->getNRows())); idx_t *xadj(new idx_t[n_rows+1]); unsigned const*const original_row_ptr(_l_adj_mat->getRowPtrArray()); for(idx_t k(0); k<=n_rows; k++) { xadj[k] = original_row_ptr[k]; } unsigned nnz(_l_adj_mat->getNNZ()); idx_t *adjncy(new idx_t[nnz]); unsigned const*const original_adjncy(_l_adj_mat->getColIdxArray()); for(unsigned k(0); k<nnz; k++) { adjncy[k] = original_adjncy[k]; } // unsigned nparts = 2; idx_t options[METIS_NOPTIONS]; // for METIS METIS_SetDefaultOptions(options); // options[METIS OPTION PTYPE] = METIS PTYPE RB; // options[METIS OPTION OBJTYPE] = METIS OBJTYPE CUT; // options[METIS OPTION CTYPE] = METIS CTYPE SHEM; // options[] = ; // options[] = ; // options[] = ; // unsigned sepsize(0); // for METIS idx_t *vwgt(new idx_t[n_rows + 1]); // const unsigned nnz(xadj[n_rows]); // unsigned *adjwgt(new unsigned[nnz]); for (idx_t k(0); k < n_rows + 1; k++) vwgt[k] = 1; // for (unsigned k(0); k < nnz; k++) // adjwgt[k] = 1; // unsigned *part(new unsigned[n_rows + 1]); // subdivide the index set into three parts employing METIS // METIS_ComputeVertexSeparator(&n_rows, xadj, adjncy, vwgt, &options, // &sepsize, part); idx_t *loc_op_perm(new idx_t[n_rows]); idx_t *loc_po_perm(new idx_t[n_rows]); for (idx_t k(0); k<n_rows; k++) { loc_op_perm[k] = _g_op_perm[k]; } for (idx_t k(0); k<n_rows; k++) { loc_po_perm[k] = _g_po_perm[k]; } METIS_NodeND(&n_rows, xadj, adjncy, vwgt, options, loc_op_perm, loc_po_perm); for (idx_t k(0); k<n_rows; k++) { _g_op_perm[k] = loc_op_perm[k]; } for (idx_t k(0); k<n_rows; k++) { _g_po_perm[k] = loc_po_perm[k]; } delete [] loc_op_perm; delete [] loc_po_perm; delete [] vwgt; delete [] adjncy; delete [] xadj; // // create and init local permutations // unsigned *l_op_perm(new unsigned[size]); // unsigned *l_po_perm(new unsigned[size]); // for (unsigned i = 0; i < size; ++i) // l_op_perm[i] = l_po_perm[i] = i; // // unsigned isep1, isep2; // updatePerm(part, isep1, isep2, l_op_perm, l_po_perm); // delete[] part; // // // update global permutation // unsigned *t_op_perm = new unsigned[size]; // for (unsigned k = 0; k < size; ++k) // t_op_perm[k] = _g_op_perm[_beg + l_op_perm[k]]; // // for (unsigned k = _beg; k < _end; ++k) { // _g_op_perm[k] = t_op_perm[k - _beg]; // _g_po_perm[_g_op_perm[k]] = k; // } // delete[] t_op_perm; // // // next recursion step // if ((isep1 >= bmin) && (isep2 - isep1 >= bmin)) { // // construct adj matrices for [0, isep1), [isep1,isep2), [isep2, _end) // AdjMat *l_adj0(_l_adj_mat->getMat(0, isep1, l_op_perm, l_po_perm)); // AdjMat *l_adj1(_l_adj_mat->getMat(isep1, isep2, l_op_perm, l_po_perm)); // AdjMat *l_adj2(_l_adj_mat->getMat(isep2, size, l_op_perm, l_po_perm)); // // delete[] l_op_perm; // delete[] l_po_perm; // delete _l_adj_mat; // _l_adj_mat = NULL; // // _n_sons = 3; // _sons = new ClusterBase*[_n_sons]; // // isep1 += _beg; // isep2 += _beg; // // // constructing child nodes for index cluster tree // _sons[0] = new Cluster(this, _beg, isep1, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj0); // _sons[1] = new Cluster(this, isep1, isep2, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj1); // _sons[2] = new Separator(this, isep2, _end, _g_op_perm, _g_po_perm, _g_adj_mat, l_adj2); // // dynamic_cast<Cluster*>(_sons[0])->subdivide(bmin); // dynamic_cast<Cluster*>(_sons[1])->subdivide(bmin); // // } else { // delete _l_adj_mat; // _l_adj_mat = NULL; // } // end if next recursion step } // end if ( connected && size () > bmin ) }
void MetisPartitioner::partition_range(MeshBase & mesh, MeshBase::element_iterator beg, MeshBase::element_iterator end, unsigned int n_pieces) { libmesh_assert_greater (n_pieces, 0); // We don't yet support distributed meshes with this Partitioner if (!mesh.is_serial()) libmesh_not_implemented(); // Check for an easy return if (n_pieces == 1) { this->single_partition_range (beg, end); return; } // What to do if the Metis library IS NOT present #ifndef LIBMESH_HAVE_METIS libmesh_here(); libMesh::err << "ERROR: The library has been built without" << std::endl << "Metis support. Using a space-filling curve" << std::endl << "partitioner instead!" << std::endl; SFCPartitioner sfcp; sfcp.partition_range (mesh, beg, end, n_pieces); // What to do if the Metis library IS present #else LOG_SCOPE("partition_range()", "MetisPartitioner"); const dof_id_type n_range_elem = std::distance(beg, end); // Metis will only consider the elements in the range. // We need to map the range element ids into a // contiguous range. Further, we want the unique range indexing to be // independent of the element ordering, otherwise a circular dependency // can result in which the partitioning depends on the ordering which // depends on the partitioning... vectormap<dof_id_type, dof_id_type> global_index_map; global_index_map.reserve (n_range_elem); { std::vector<dof_id_type> global_index; MeshCommunication().find_global_indices (mesh.comm(), MeshTools::create_bounding_box(mesh), beg, end, global_index); libmesh_assert_equal_to (global_index.size(), n_range_elem); MeshBase::element_iterator it = beg; for (std::size_t cnt=0; it != end; ++it) { const Elem * elem = *it; global_index_map.insert (std::make_pair(elem->id(), global_index[cnt++])); } libmesh_assert_equal_to (global_index_map.size(), n_range_elem); } // If we have boundary elements in this mesh, we want to account for // the connectivity between them and interior elements. We can find // interior elements from boundary elements, but we need to build up // a lookup map to do the reverse. typedef std::unordered_multimap<const Elem *, const Elem *> map_type; map_type interior_to_boundary_map; { MeshBase::element_iterator it = beg; for (; it != end; ++it) { const Elem * elem = *it; // If we don't have an interior_parent then there's nothing // to look us up. if ((elem->dim() >= LIBMESH_DIM) || !elem->interior_parent()) continue; // get all relevant interior elements std::set<const Elem *> neighbor_set; elem->find_interior_neighbors(neighbor_set); std::set<const Elem *>::iterator n_it = neighbor_set.begin(); for (; n_it != neighbor_set.end(); ++n_it) { // FIXME - non-const versions of the std::set<const Elem // *> returning methods would be nice Elem * neighbor = const_cast<Elem *>(*n_it); #if defined(LIBMESH_HAVE_UNORDERED_MULTIMAP) || \ defined(LIBMESH_HAVE_TR1_UNORDERED_MULTIMAP) || \ defined(LIBMESH_HAVE_HASH_MULTIMAP) || \ defined(LIBMESH_HAVE_EXT_HASH_MULTIMAP) interior_to_boundary_map.insert(std::make_pair(neighbor, elem)); #else interior_to_boundary_map.insert(interior_to_boundary_map.begin(), std::make_pair(neighbor, elem)); #endif } } } // Data structure that Metis will fill up on processor 0 and broadcast. std::vector<Metis::idx_t> part(n_range_elem); // Invoke METIS, but only on processor 0. // Then broadcast the resulting decomposition if (mesh.processor_id() == 0) { // Data structures and parameters needed only on processor 0 by Metis. // std::vector<Metis::idx_t> options(5); std::vector<Metis::idx_t> vwgt(n_range_elem); Metis::idx_t n = static_cast<Metis::idx_t>(n_range_elem), // number of "nodes" (elements) in the graph // wgtflag = 2, // weights on vertices only, none on edges // numflag = 0, // C-style 0-based numbering nparts = static_cast<Metis::idx_t>(n_pieces), // number of subdomains to create edgecut = 0; // the numbers of edges cut by the resulting partition // Set the options // options[0] = 0; // use default options // build the graph METIS_CSR_Graph<Metis::idx_t> csr_graph; csr_graph.offsets.resize(n_range_elem + 1, 0); // Local scope for these { // build the graph in CSR format. Note that // the edges in the graph will correspond to // face neighbors #ifdef LIBMESH_ENABLE_AMR std::vector<const Elem *> neighbors_offspring; #endif #ifndef NDEBUG std::size_t graph_size=0; #endif // (1) first pass - get the row sizes for each element by counting the number // of face neighbors. Also populate the vwght array if necessary MeshBase::element_iterator it = beg; for (; it != end; ++it) { const Elem * elem = *it; const dof_id_type elem_global_index = global_index_map[elem->id()]; libmesh_assert_less (elem_global_index, vwgt.size()); // maybe there is a better weight? // The weight is used to define what a balanced graph is if (!_weights) vwgt[elem_global_index] = elem->n_nodes(); else vwgt[elem_global_index] = static_cast<Metis::idx_t>((*_weights)[elem->id()]); unsigned int num_neighbors = 0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for (auto neighbor : elem->neighbor_ptr_range()) { if (neighbor != libmesh_nullptr) { // If the neighbor is active, but is not in the // range of elements being partitioned, treat it // as a NULL neighbor. if (neighbor->active() && !global_index_map.count(neighbor->id())) continue; // If the neighbor is active treat it // as a connection if (neighbor->active()) num_neighbors++; #ifdef LIBMESH_ENABLE_AMR // Otherwise we need to find all of the // neighbor's children that are connected to // us and add them else { // The side of the neighbor to which // we are connected const unsigned int ns = neighbor->which_neighbor_am_i (elem); libmesh_assert_less (ns, neighbor->n_neighbors()); // Get all the active children (& grandchildren, etc...) // of the neighbor. // FIXME - this is the wrong thing, since we // should be getting the active family tree on // our side only. But adding too many graph // links may cause hanging nodes to tend to be // on partition interiors, which would reduce // communication overhead for constraint // equations, so we'll leave it. neighbor->active_family_tree (neighbors_offspring); // Get all the neighbor's children that // live on that side and are thus connected // to us for (std::size_t nc=0; nc<neighbors_offspring.size(); nc++) { const Elem * child = neighbors_offspring[nc]; // Skip neighbor offspring which are not in the range of elements being partitioned. if (!global_index_map.count(child->id())) continue; // This does not assume a level-1 mesh. // Note that since children have sides numbered // coincident with the parent then this is a sufficient test. if (child->neighbor_ptr(ns) == elem) { libmesh_assert (child->active()); num_neighbors++; } } } #endif /* ifdef LIBMESH_ENABLE_AMR */ } } // Check for any interior neighbors if ((elem->dim() < LIBMESH_DIM) && elem->interior_parent()) { // get all relevant interior elements std::set<const Elem *> neighbor_set; elem->find_interior_neighbors(neighbor_set); num_neighbors += neighbor_set.size(); } // Check for any boundary neighbors typedef map_type::iterator map_it_type; std::pair<map_it_type, map_it_type> bounds = interior_to_boundary_map.equal_range(elem); num_neighbors += std::distance(bounds.first, bounds.second); csr_graph.prep_n_nonzeros(elem_global_index, num_neighbors); #ifndef NDEBUG graph_size += num_neighbors; #endif } csr_graph.prepare_for_use(); // (2) second pass - fill the compressed adjacency array it = beg; for (; it != end; ++it) { const Elem * elem = *it; const dof_id_type elem_global_index = global_index_map[elem->id()]; unsigned int connection=0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for (auto neighbor : elem->neighbor_ptr_range()) { if (neighbor != libmesh_nullptr) { // If the neighbor is active, but is not in the // range of elements being partitioned, treat it // as a NULL neighbor. if (neighbor->active() && !global_index_map.count(neighbor->id())) continue; // If the neighbor is active treat it // as a connection if (neighbor->active()) csr_graph(elem_global_index, connection++) = global_index_map[neighbor->id()]; #ifdef LIBMESH_ENABLE_AMR // Otherwise we need to find all of the // neighbor's children that are connected to // us and add them else { // The side of the neighbor to which // we are connected const unsigned int ns = neighbor->which_neighbor_am_i (elem); libmesh_assert_less (ns, neighbor->n_neighbors()); // Get all the active children (& grandchildren, etc...) // of the neighbor. neighbor->active_family_tree (neighbors_offspring); // Get all the neighbor's children that // live on that side and are thus connected // to us for (std::size_t nc=0; nc<neighbors_offspring.size(); nc++) { const Elem * child = neighbors_offspring[nc]; // Skip neighbor offspring which are not in the range of elements being partitioned. if (!global_index_map.count(child->id())) continue; // This does not assume a level-1 mesh. // Note that since children have sides numbered // coincident with the parent then this is a sufficient test. if (child->neighbor_ptr(ns) == elem) { libmesh_assert (child->active()); csr_graph(elem_global_index, connection++) = global_index_map[child->id()]; } } } #endif /* ifdef LIBMESH_ENABLE_AMR */ } } if ((elem->dim() < LIBMESH_DIM) && elem->interior_parent()) { // get all relevant interior elements std::set<const Elem *> neighbor_set; elem->find_interior_neighbors(neighbor_set); std::set<const Elem *>::iterator n_it = neighbor_set.begin(); for (; n_it != neighbor_set.end(); ++n_it) { const Elem * neighbor = *n_it; // Not all interior neighbors are necessarily in // the same Mesh (hence not in the global_index_map). // This will be the case when partitioning a // BoundaryMesh, whose elements all have // interior_parents() that belong to some other // Mesh. const Elem * queried_elem = mesh.query_elem_ptr(neighbor->id()); // Compare the neighbor and the queried_elem // pointers, make sure they are the same. if (queried_elem && queried_elem == neighbor) { vectormap<dof_id_type, dof_id_type>::iterator global_index_map_it = global_index_map.find(neighbor->id()); // If the interior_neighbor is in the Mesh but // not in the global_index_map, we have other issues. if (global_index_map_it == global_index_map.end()) libmesh_error_msg("Interior neighbor with id " << neighbor->id() << " not found in global_index_map."); else csr_graph(elem_global_index, connection++) = global_index_map_it->second; } } } // Check for any boundary neighbors for (const auto & pr : as_range(interior_to_boundary_map.equal_range(elem))) { const Elem * neighbor = pr.second; csr_graph(elem_global_index, connection++) = global_index_map[neighbor->id()]; } } // We create a non-empty vals for a disconnected graph, to // work around a segfault from METIS. libmesh_assert_equal_to (csr_graph.vals.size(), std::max(graph_size, std::size_t(1))); } // done building the graph Metis::idx_t ncon = 1; // Select which type of partitioning to create // Use recursive if the number of partitions is less than or equal to 8 if (n_pieces <= 8) Metis::METIS_PartGraphRecursive(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], libmesh_nullptr, libmesh_nullptr, &nparts, libmesh_nullptr, libmesh_nullptr, libmesh_nullptr, &edgecut, &part[0]); // Otherwise use kway else Metis::METIS_PartGraphKway(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], libmesh_nullptr, libmesh_nullptr, &nparts, libmesh_nullptr, libmesh_nullptr, libmesh_nullptr, &edgecut, &part[0]); } // end processor 0 part // Broadcast the resulting partition mesh.comm().broadcast(part); // Assign the returned processor ids. The part array contains // the processor id for each active element, but in terms of // the contiguous indexing we defined above { MeshBase::element_iterator it = beg; for (; it!=end; ++it) { Elem * elem = *it; libmesh_assert (global_index_map.count(elem->id())); const dof_id_type elem_global_index = global_index_map[elem->id()]; libmesh_assert_less (elem_global_index, part.size()); const processor_id_type elem_procid = static_cast<processor_id_type>(part[elem_global_index]); elem->processor_id() = elem_procid; } } #endif }
void ParMETISGraphPartitionerImpl::p_partition(void) { int me(this->processor_rank()); std::vector<idx_t> vtxdist; std::vector<idx_t> xadj; std::vector<idx_t> adjncy; ParMETISGraphWrapper wrap(p_adjacency_list); wrap.get_csr_local(vtxdist, xadj, adjncy); int nnodes(vtxdist[me+1] - vtxdist[me]); #if 0 for (int p = 0; p < this->processor_size(); ++p) { if (this->processor_rank() == p) { std::cout << "Processor " << p << ": nodes: "; for (Index n = 0; n < nnodes; ++n) { std::cout << p_adjacency_list.node_index(n) << ","; } std::cout << std::endl; std::cout << "Processor " << p << ": vtxdist: "; std::copy(vtxdist.begin(), vtxdist.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; std::cout << "Processor " << p << ": xadj: "; std::copy(xadj.begin(), xadj.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; std::cout << "Processor " << p << ": adjncy: "; std::copy(adjncy.begin(), adjncy.end(), std::ostream_iterator<idx_t>(std::cout, ",")); std::cout << std::endl; } this->communicator().barrier(); } #endif // Call the partitioner (try to use variable names that match the documentation) int status; idx_t ncon(1); idx_t wgtflag(3), numflag(0); idx_t nparts(this->processor_size()); std::vector<idx_t> vwgt(nnodes, 1); std::vector<idx_t> adjwgt(adjncy.size(), 2); std::vector<real_t> tpwgts(nparts*ncon, 1.0/static_cast<real_t>(nparts)); real_t ubvec(1.05); std::vector<idx_t> options(3); options[0] = 1; options[1] = 127; options[2] = 14; MPI_Comm comm(this->communicator()); idx_t edgecut; std::vector<idx_t> part(nnodes); status = ParMETIS_V3_PartKway(&vtxdist[0], &xadj[0], &adjncy[0], &vwgt[0], &adjwgt[0], &wgtflag, &numflag, &ncon, &nparts, &tpwgts[0], &ubvec, &options[0], &edgecut, &part[0], &comm); if (status != 0) { // FIXME: throw an exception } // "part" contains the destination processors; transfer this to the // local array wrap.set_partition(vtxdist, part); wrap.get_partition(p_node_destinations); }
// ------------------------------------------------------------ // MetisPartitioner implementation void MetisPartitioner::_do_partition (MeshBase& mesh, const unsigned int n_pieces) { libmesh_assert_greater (n_pieces, 0); libmesh_assert (mesh.is_serial()); // Check for an easy return if (n_pieces == 1) { this->single_partition (mesh); return; } // What to do if the Metis library IS NOT present #ifndef LIBMESH_HAVE_METIS libmesh_here(); libMesh::err << "ERROR: The library has been built without" << std::endl << "Metis support. Using a space-filling curve" << std::endl << "partitioner instead!" << std::endl; SFCPartitioner sfcp; sfcp.partition (mesh, n_pieces); // What to do if the Metis library IS present #else START_LOG("partition()", "MetisPartitioner"); const dof_id_type n_active_elem = mesh.n_active_elem(); // build the graph // std::vector<int> options(5); std::vector<int> vwgt(n_active_elem); std::vector<int> part(n_active_elem); int n = static_cast<int>(n_active_elem), // number of "nodes" (elements) // in the graph // wgtflag = 2, // weights on vertices only, // // none on edges // numflag = 0, // C-style 0-based numbering nparts = static_cast<int>(n_pieces), // number of subdomains to create edgecut = 0; // the numbers of edges cut by the // resulting partition // Set the options // options[0] = 0; // use default options // Metis will only consider the active elements. // We need to map the active element ids into a // contiguous range. Further, we want the unique range indexing to be // independednt of the element ordering, otherwise a circular dependency // can result in which the partitioning depends on the ordering which // depends on the partitioning... std::map<const Elem*, dof_id_type> global_index_map; { std::vector<dof_id_type> global_index; MeshBase::element_iterator it = mesh.active_elements_begin(); const MeshBase::element_iterator end = mesh.active_elements_end(); MeshCommunication().find_global_indices (MeshTools::bounding_box(mesh), it, end, global_index); libmesh_assert_equal_to (global_index.size(), n_active_elem); for (std::size_t cnt=0; it != end; ++it) { const Elem *elem = *it; libmesh_assert (!global_index_map.count(elem)); global_index_map[elem] = global_index[cnt++]; } libmesh_assert_equal_to (global_index_map.size(), n_active_elem); } // build the graph in CSR format. Note that // the edges in the graph will correspond to // face neighbors std::vector<int> xadj, adjncy; { std::vector<const Elem*> neighbors_offspring; MeshBase::element_iterator elem_it = mesh.active_elements_begin(); const MeshBase::element_iterator elem_end = mesh.active_elements_end(); // This will be exact when there is no refinement and all the // elements are of the same type. std::size_t graph_size=0; std::vector<std::vector<dof_id_type> > graph(n_active_elem); for (; elem_it != elem_end; ++elem_it) { const Elem* elem = *elem_it; libmesh_assert (global_index_map.count(elem)); const dof_id_type elem_global_index = global_index_map[elem]; libmesh_assert_less (elem_global_index, vwgt.size()); libmesh_assert_less (elem_global_index, graph.size()); // maybe there is a better weight? // The weight is used to define what a balanced graph is if(!_weights) vwgt[elem_global_index] = elem->n_nodes(); else vwgt[elem_global_index] = static_cast<int>((*_weights)[elem->id()]); // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for (unsigned int ms=0; ms<elem->n_neighbors(); ms++) { const Elem* neighbor = elem->neighbor(ms); if (neighbor != NULL) { // If the neighbor is active treat it // as a connection if (neighbor->active()) { libmesh_assert (global_index_map.count(neighbor)); const dof_id_type neighbor_global_index = global_index_map[neighbor]; graph[elem_global_index].push_back(neighbor_global_index); graph_size++; } #ifdef LIBMESH_ENABLE_AMR // Otherwise we need to find all of the // neighbor's children that are connected to // us and add them else { // The side of the neighbor to which // we are connected const unsigned int ns = neighbor->which_neighbor_am_i (elem); libmesh_assert_less (ns, neighbor->n_neighbors()); // Get all the active children (& grandchildren, etc...) // of the neighbor. neighbor->active_family_tree (neighbors_offspring); // Get all the neighbor's children that // live on that side and are thus connected // to us for (unsigned int nc=0; nc<neighbors_offspring.size(); nc++) { const Elem* child = neighbors_offspring[nc]; // This does not assume a level-1 mesh. // Note that since children have sides numbered // coincident with the parent then this is a sufficient test. if (child->neighbor(ns) == elem) { libmesh_assert (child->active()); libmesh_assert (global_index_map.count(child)); const dof_id_type child_global_index = global_index_map[child]; graph[elem_global_index].push_back(child_global_index); graph_size++; } } } #endif /* ifdef LIBMESH_ENABLE_AMR */ } } } // Convert the graph into the format Metis wants xadj.reserve(n_active_elem+1); adjncy.reserve(graph_size); for (std::size_t r=0; r<graph.size(); r++) { xadj.push_back(adjncy.size()); std::vector<dof_id_type> graph_row; // build this emtpy graph_row.swap(graph[r]); // this will deallocate at the end of scope adjncy.insert(adjncy.end(), graph_row.begin(), graph_row.end()); } // The end of the adjacency array for the last elem xadj.push_back(adjncy.size()); libmesh_assert_equal_to (adjncy.size(), graph_size); libmesh_assert_equal_to (xadj.size(), n_active_elem+1); } // done building the graph if (adjncy.empty()) adjncy.push_back(0); int ncon = 1; // Select which type of partitioning to create // Use recursive if the number of partitions is less than or equal to 8 if (n_pieces <= 8) Metis::METIS_PartGraphRecursive(&n, &ncon, &xadj[0], &adjncy[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); // Otherwise use kway else Metis::METIS_PartGraphKway(&n, &ncon, &xadj[0], &adjncy[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); // Assign the returned processor ids. The part array contains // the processor id for each active element, but in terms of // the contiguous indexing we defined above { MeshBase::element_iterator it = mesh.active_elements_begin(); const MeshBase::element_iterator end = mesh.active_elements_end(); for (; it!=end; ++it) { Elem* elem = *it; libmesh_assert (global_index_map.count(elem)); const dof_id_type elem_global_index = global_index_map[elem]; libmesh_assert_less (elem_global_index, part.size()); const processor_id_type elem_procid = static_cast<processor_id_type>(part[elem_global_index]); elem->processor_id() = elem_procid; } } STOP_LOG("partition()", "MetisPartitioner"); #endif }
void PartitionerMetis<MeshType>::partitionImpl ( mesh_ptrtype mesh, rank_type np ) { LOG(INFO) << "PartitionerMetis::partitionImpl starts..."; tic(); // Check for an easy return if (np == 1) { this->singlePartition (mesh); return; } const dof_id_type n_elems = mesh->numElements(); // build the graph // std::vector<Metis::idx_t> options(5); std::vector<Metis::idx_t> vwgt(n_elems); std::vector<Metis::idx_t> part(n_elems); // number of "nodes" (elements) in the graph Metis::idx_t n = static_cast<Metis::idx_t>(n_elems); // number of subdomains to create Metis::idx_t nparts = static_cast<Metis::idx_t>(np); // number of edges cut by the resulting partition Metis::idx_t edgecut = 0; std::map<dof_id_type, dof_id_type> global_index_map; { std::vector<dof_id_type> global_index(nelements(elements(mesh)),0); std::iota( global_index.begin(), global_index.end(), 0 ); size_type cnt = 0; for( auto const& elt : elements(mesh) ) { global_index_map.insert (std::make_pair(elt.id(), global_index[cnt++])); } } // Invoke METIS, but only on processor 0. // Then broadcast the resulting decomposition if ( Environment::isMasterRank() ) { CSRGraphMetis<Metis::idx_t> csr_graph; csr_graph.offsets.resize(mesh->numElements()+1, 0); // Local scope for these { #ifndef NDEBUG std::size_t graph_size=0; #endif // build the graph in CSR format. Note that // the edges in the graph will correspond to // face neighbors for( auto& elt: elements(mesh) ) { // (1) first pass - get the row sizes for each element by counting the number // of face neighbors. Also populate the vwght array if necessary const dof_id_type gid = global_index_map[elt.id()]; CHECK( gid < vwgt.size() ) << "Invalid gid " << gid << " greater or equal than " << vwgt.size(); // maybe there is a better weight? // The weight is used to define what a balanced graph is //if(!_weights) vwgt[gid] = elt.numPoints; //else //vwgt[gid] = static_cast<Metis::idx_t>((*_weights)[elem->id()]); unsigned int num_neighbors = 0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ ) { element_type const* neighbor = NULL; size_type neighbor_id = elt.neighbor( ms ).first; if ( neighbor_id != invalid_size_type_value ) { num_neighbors++; } } std::cout << "element id " << elt.id() << " gid: " << gid << " w: " << vwgt[gid] << " neigh: " << num_neighbors << std::endl; csr_graph.prepareNumberNonZeros(gid, num_neighbors); #ifndef NDEBUG graph_size += num_neighbors; #endif } csr_graph.prepareForUse(); // (2) second pass - fill the compressed adjacency array for( auto& elt : elements(mesh) ) { dof_id_type gid = global_index_map[elt.id()]; unsigned int connection=0; // Loop over the element's neighbors. An element // adjacency corresponds to a face neighbor for ( uint16_type ms=0; ms < elt.nNeighbors(); ms++ ) { element_type const* neighbor = NULL; size_type neighbor_id = elt.neighbor( ms ).first; if ( neighbor_id != invalid_size_type_value ) { csr_graph(gid, connection++) = global_index_map[neighbor_id]; } } } #ifndef NDEBUG // We create a non-empty vals for a disconnected graph, to // work around a segfault from METIS. DCHECK( csr_graph.vals.size() == std::max(graph_size,std::size_t(1))) << "Invalid graph"; #endif } // done building the graph Metis::idx_t ncon = 1; // Select which type of partitioning to create // Use recursive if the number of partitions is less than or equal to 8 if (np <= 8) Metis::METIS_PartGraphRecursive(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); // Otherwise use kway else Metis::METIS_PartGraphKway(&n, &ncon, &csr_graph.offsets[0], &csr_graph.vals[0], &vwgt[0], NULL, NULL, &nparts, NULL, NULL, NULL, &edgecut, &part[0]); } // end processor 0 part // Assign the returned processor ids. The part array contains the processor // id for each element, but in terms of the contiguous indexing we defined // above LOG(INFO) << "PartitionerMetis::partitionImpl nelements : " << nelements(elements(mesh)); for( auto it = mesh->beginElement(), en = mesh->endElement(); it != en; ++it ) { dof_id_type gid = global_index_map[it->id()]; CHECK( gid < part.size() ) << "Invalid gid " << gid << " greater or equal than partition size " << part.size(); rank_type pid = static_cast<rank_type>(part[gid]); #if 0 mesh->elements().modify( it, [&pid]( element_type& e ) { e.setProcessId( pid ); std::cout << "element id " << e.id() << " process id " << e.processId() << "\n"; }); #else std::cout << "element id " << it->id() << " process id " << pid << "\n"; auto e = *it; e.setProcessId( pid ); mesh->elements().replace( it, e ); #endif } for( auto& e : allelements(mesh) ) { std::cout << "2. element id " << e.id() << " process id " << e.processId() << "\n"; } auto t = toc("PartitionerMetis::partitionImpl", FLAGS_v > 0 ); LOG(INFO) << "PartitionerMetis::partitionImpl done in " << t << "s"; }
static int compute_hypergraph_metrics(const Epetra_BlockMap &rowmap, const Epetra_BlockMap &colmap, int numGlobalColumns, Isorropia::Epetra::CostDescriber &costs, double &myGoalWeight, double &balance, double &cutn, double &cutl) // output { const Epetra_Comm &comm = rowmap.Comm(); #ifdef HAVE_MPI const Epetra_MpiComm* mpiComm = dynamic_cast<const Epetra_MpiComm*>(&comm); MPI_Comm mcomm = mpiComm->Comm(); #endif int nProcs = comm.NumProc(); int myProc = comm.MyPID(); double min, avg; std::map<int, float> vertexWeights; std::map<int, std::map<int, float > > graphEdgeWeights; std::map<int, float> hyperEdgeWeights; costs.getCosts(vertexWeights, // vertex global ID -> weight graphEdgeWeights, // vertex global ID -> map from neighbor global ID to edge weight hyperEdgeWeights); // hyperedge global ID -> weight Epetra_Vector vwgt(rowmap); int numVWgts = vertexWeights.size(); if (numVWgts > 0){ double *wvals = new double [numVWgts]; int *gids = new int [numVWgts]; std::map<int, float>::iterator vnext = vertexWeights.begin(); int i=0; while (vnext != vertexWeights.end()){ wvals[i] = vnext->second; gids[i] = vnext->first; vnext++; i++; } vwgt.ReplaceGlobalValues(i, wvals, gids); delete [] wvals; delete [] gids; } else{ vwgt.PutScalar(1.0); // default to unit weights } compute_balance(vwgt, myGoalWeight, min, balance, avg); if (balance < 0){ return 1; } /* Compute cutl and cutn. */ int totalHEWeights = 0; int numHEWeights = hyperEdgeWeights.size(); comm.SumAll(&numHEWeights, &totalHEWeights, 1); if ((totalHEWeights > 0) && (totalHEWeights < numGlobalColumns)){ if (myProc == 0) std::cerr << "Must supply either no h.e. weights or else supply at least one for each column" << std::endl; return -1; } std::map<int, float>::iterator heWgtIter; // Create a set containing all the columns in my rows. We assume all // the rows are in the same partition. int numMyCols = colmap.NumMyElements(); std::set<int> colGIDS; std::set<int>::iterator gidIter; for (int j=0; j<numMyCols; j++){ colGIDS.insert(colmap.GID(j)); } /* Divide columns among processes, then each process computes its * assigned columns' cutl and cutn. * TODO - numGlobalColumns can be less than nprocs * Fix this when a process is assigned no columns. TODO */ int ncols = numGlobalColumns / nProcs; int leftover = numGlobalColumns - (nProcs * ncols); std::vector<int> colCount(nProcs, 0); for (int i=0; i<nProcs; i++){ colCount[i] = ncols; if (i < leftover) colCount[i]++; } int *colTotals = NULL; double *colWeights = NULL; if (colCount[myProc] > 0){ colTotals = new int [colCount[myProc]]; if (totalHEWeights > 0){ colWeights = new double [colCount[myProc]]; } } int *colLocal= new int [ncols + 1]; double *localWeights = NULL; if (totalHEWeights > 0){ localWeights = new double [ncols + 1]; } int base = colmap.IndexBase(); int colStart = base; for (int i=0; i<nProcs; i++){ // All processes send info to the process reponsible // for the next group of columns int ncols = colCount[i]; int colEnd = colStart + ncols; for (int j=colStart,k=0; j < colEnd; j++,k++){ gidIter = colGIDS.find(j); if (gidIter != colGIDS.end()){ colLocal[k] = 1; // column j has rows in my partition } else{ colLocal[k] = 0; } if (totalHEWeights > 0){ std::map<int, float>::iterator heWgtIter = hyperEdgeWeights.find(j); if (heWgtIter != hyperEdgeWeights.end()){ // I have the edge weight for column j localWeights[k] = heWgtIter->second; } else{ localWeights[k] = 0.0; } } } #ifdef HAVE_MPI int rc = MPI_Reduce(colLocal, colTotals, ncols, MPI_INT, MPI_SUM, i, mcomm); if (totalHEWeights > 0){ rc = MPI_Reduce(localWeights, colWeights, ncols, MPI_DOUBLE, MPI_SUM, i, mcomm); } // TODO handle possible MPI error #else memcpy(colTotals, colLocal, ncols * sizeof(int)); if (totalHEWeights > 0){ memcpy(colWeights, localWeights, ncols * sizeof(double)); } #endif colStart = colEnd; } delete [] colLocal; if (localWeights) delete [] localWeights; double localCutN=0; double localCutL=0; double ewgt = 1.0; for (int j=0; j<colCount[myProc]; j++){ if (totalHEWeights > 0){ ewgt = colWeights[j]; } if (colTotals[j] > 1){ localCutL += (colTotals[j] - 1) * ewgt; // # of cuts in columns/edges localCutN += ewgt; // # of cut columns/edges } } if (colTotals) delete [] colTotals; if (colWeights) delete [] colWeights; comm.SumAll(&localCutN, &cutn, 1); comm.SumAll(&localCutL, &cutl, 1); return 0; }
static int compute_graph_metrics(const Epetra_BlockMap &rowmap, const Epetra_BlockMap &colmap, std::vector<std::vector<int> > &rows, Isorropia::Epetra::CostDescriber &costs, double &myGoalWeight, double &balance, int &numCuts, double &cutWgt, double &cutn, double &cutl) { const Epetra_Comm &comm = rowmap.Comm(); int myProc = comm.MyPID(); int myCols = colmap.NumMyElements(); double min, avg; std::map<int, float> vertexWeights; std::map<int, std::map<int, float > > graphEdgeWeights; std::map<int, float> hyperEdgeWeights; costs.getCosts(vertexWeights, // vertex global ID -> weight graphEdgeWeights, // vertex global ID -> map from neighbor global ID to edge weight hyperEdgeWeights); // hyperedge global ID -> weight // Compute the balance Epetra_Vector vwgt(rowmap); int numVWgts = vertexWeights.size(); if (numVWgts > 0){ double *wvals = new double [numVWgts]; int *gids = new int [numVWgts]; std::map<int, float>::iterator vnext = vertexWeights.begin(); int i=0; while (vnext != vertexWeights.end()){ wvals[i] = vnext->second; gids[i] = vnext->first; vnext++; i++; } vwgt.ReplaceGlobalValues(i, wvals, gids); delete [] wvals; delete [] gids; } else{ vwgt.PutScalar(1.0); // default to unit weights } compute_balance(vwgt, myGoalWeight, min, balance, avg); if (balance < 0){ return 1; } // Compute the measures based on cut edges int *procID = new int [myCols]; int *GID = new int [myCols]; int *tmp = new int [myCols]; for (int i=0; i < myCols; i++){ GID[i] = colmap.GID(i); } rowmap.RemoteIDList(myCols, GID, procID, tmp); // matrix is square delete [] tmp; int haveEdgeWeights = graphEdgeWeights.size(); int localNumCuts = 0; double localCutWgt = 0.0; double localCutn = 0.0; double localCutl = 0.0; for (int i=0; i < rowmap.NumMyElements(); i++){ int vtxGID = rowmap.GID(i); int numEdges = rows[i].size(); if (numEdges > 0){ std::map<int, std::map<int, float> >::iterator wnext; if (haveEdgeWeights){ wnext = graphEdgeWeights.find(vtxGID); if (wnext == graphEdgeWeights.end()){ std::cerr << "Graph edge weights are missing for vertex " << vtxGID; std::cerr << std::endl; return -1; } } double heWeight = 0.0; std::set<int> nbors; for (int j=0; j < numEdges; j++){ int colGID = GID[rows[i][j]]; int nborProc = procID[rows[i][j]]; if (colGID == vtxGID) continue; // skip self edges float wgt = 1.0; if (haveEdgeWeights){ std::map<int, float>::iterator curr = (wnext->second).find(colGID); if (curr == (wnext->second).end()){ std::cerr << "Graph edge weights do not match matrix"; std::cerr << std::endl; return -1; } wgt = curr->second; } if (nborProc != myProc){ localNumCuts++; // number of graph edges that are cut nbors.insert(nborProc); // count number of neighboring processes localCutWgt += wgt; // sum of weights of cut edges } heWeight += wgt; // implied hyperedge weight, sum all edges } int numNbors = nbors.size(); if (numNbors > 0){ // implied hyperedge is vertex and neighbors, if cut, add in its he weight localCutn += heWeight; // sum of (number of partitions - 1) weighted by the // implied hyperedge weight localCutl += (numNbors * heWeight); } } } // next vertex in my partition delete [] GID; delete [] procID; double lval[4], gval[4]; lval[0] = (double)localNumCuts; lval[1] = localCutWgt; lval[2] = localCutn; lval[3] = localCutl; comm.SumAll(lval, gval, 4); numCuts = (int)gval[0]; cutWgt = gval[1]; cutn = gval[2]; cutl = gval[3]; return 0; }