void Partitioner::partition_unpartitioned_elements (MeshBase & mesh, const unsigned int n_subdomains) { MeshBase::element_iterator it = mesh.unpartitioned_elements_begin(); const MeshBase::element_iterator end = mesh.unpartitioned_elements_end(); const dof_id_type n_unpartitioned_elements = MeshTools::n_elem (it, end); // the unpartitioned elements must exist on all processors. If the range is empty on one // it is empty on all, and we can quit right here. if (!n_unpartitioned_elements) return; // find the target subdomain sizes std::vector<dof_id_type> subdomain_bounds(mesh.n_processors()); for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) { dof_id_type tgt_subdomain_size = 0; // watch out for the case that n_subdomains < n_processors if (pid < n_subdomains) { tgt_subdomain_size = n_unpartitioned_elements/n_subdomains; if (pid < n_unpartitioned_elements%n_subdomains) tgt_subdomain_size++; } //libMesh::out << "pid, #= " << pid << ", " << tgt_subdomain_size << std::endl; if (pid == 0) subdomain_bounds[0] = tgt_subdomain_size; else subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size; } libmesh_assert_equal_to (subdomain_bounds.back(), n_unpartitioned_elements); // create the unique mapping for all unpartitioned elements independent of partitioning // determine the global indexing for all the unpartitoned elements std::vector<dof_id_type> global_indices; // Calling this on all processors a unique range in [0,n_unpartitioned_elements) is constructed. // Only the indices for the elements we pass in are returned in the array. MeshCommunication().find_global_indices (mesh.comm(), MeshTools::bounding_box(mesh), it, end, global_indices); for (dof_id_type cnt=0; it != end; ++it) { Elem * elem = *it; libmesh_assert_less (cnt, global_indices.size()); const dof_id_type global_index = global_indices[cnt++]; libmesh_assert_less (global_index, subdomain_bounds.back()); libmesh_assert_less (global_index, n_unpartitioned_elements); const processor_id_type subdomain_id = cast_int<processor_id_type> (std::distance(subdomain_bounds.begin(), std::upper_bound(subdomain_bounds.begin(), subdomain_bounds.end(), global_index))); libmesh_assert_less (subdomain_id, n_subdomains); elem->processor_id() = subdomain_id; //libMesh::out << "assigning " << global_index << " to " << subdomain_id << std::endl; } }
void ParmetisPartitioner::initialize (const MeshBase & mesh, const unsigned int n_sbdmns) { const dof_id_type n_active_local_elem = mesh.n_active_local_elem(); // Set parameters. _pmetis->wgtflag = 2; // weights on vertices only _pmetis->ncon = 1; // one weight per vertex _pmetis->numflag = 0; // C-style 0-based numbering _pmetis->nparts = static_cast<Parmetis::idx_t>(n_sbdmns); // number of subdomains to create _pmetis->edgecut = 0; // the numbers of edges cut by the // partition // Initialize data structures for ParMETIS _pmetis->vtxdist.resize (mesh.n_processors()+1); std::fill (_pmetis->vtxdist.begin(), _pmetis->vtxdist.end(), 0); _pmetis->tpwgts.resize (_pmetis->nparts); std::fill (_pmetis->tpwgts.begin(), _pmetis->tpwgts.end(), 1./_pmetis->nparts); _pmetis->ubvec.resize (_pmetis->ncon); std::fill (_pmetis->ubvec.begin(), _pmetis->ubvec.end(), 1.05); _pmetis->part.resize (n_active_local_elem); std::fill (_pmetis->part.begin(), _pmetis->part.end(), 0); _pmetis->options.resize (5); _pmetis->vwgt.resize (n_active_local_elem); // Set the options _pmetis->options[0] = 1; // don't use default options _pmetis->options[1] = 0; // default (level of timing) _pmetis->options[2] = 15; // random seed (default) _pmetis->options[3] = 2; // processor distribution and subdomain distribution are decoupled // Find the number of active elements on each processor. We cannot use // mesh.n_active_elem_on_proc(pid) since that only returns the number of // elements assigned to pid which are currently stored on the calling // processor. This will not in general be correct for parallel meshes // when (pid!=mesh.processor_id()). _n_active_elem_on_proc.resize(mesh.n_processors()); mesh.comm().allgather(n_active_local_elem, _n_active_elem_on_proc); // count the total number of active elements in the mesh. Note we cannot // use mesh.n_active_elem() in general since this only returns the number // of active elements which are stored on the calling processor. // We should not use n_active_elem for any allocation because that will // be inheritly unscalable, but it can be useful for libmesh_assertions. dof_id_type n_active_elem=0; // Set up the vtxdist array. This will be the same on each processor. // ***** Consult the Parmetis documentation. ***** libmesh_assert_equal_to (_pmetis->vtxdist.size(), cast_int<std::size_t>(mesh.n_processors()+1)); libmesh_assert_equal_to (_pmetis->vtxdist[0], 0); for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) { _pmetis->vtxdist[pid+1] = _pmetis->vtxdist[pid] + _n_active_elem_on_proc[pid]; n_active_elem += _n_active_elem_on_proc[pid]; } libmesh_assert_equal_to (_pmetis->vtxdist.back(), static_cast<Parmetis::idx_t>(n_active_elem)); // ParMetis expects the elements to be numbered in contiguous blocks // by processor, i.e. [0, ne0), [ne0, ne0+ne1), ... // Since we only partition active elements we should have no expectation // that we currently have such a distribution. So we need to create it. // Also, at the same time we are going to map all the active elements into a globally // unique range [0,n_active_elem) which is *independent* of the current partitioning. // This can be fed to ParMetis as the initial partitioning of the subdomains (decoupled // from the partitioning of the objects themselves). This allows us to get the same // resultant partitioning independed of the input partitioning. MeshTools::BoundingBox bbox = MeshTools::bounding_box(mesh); _global_index_by_pid_map.clear(); // Maps active element ids into a contiguous range independent of partitioning. // (only needs local scope) vectormap<dof_id_type, dof_id_type> global_index_map; { std::vector<dof_id_type> global_index; // create the mapping which is contiguous by processor dof_id_type pid_offset=0; for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) { MeshBase::const_element_iterator it = mesh.active_pid_elements_begin(pid); const MeshBase::const_element_iterator end = mesh.active_pid_elements_end(pid); // note that we may not have all (or any!) the active elements which belong on this processor, // but by calling this on all processors a unique range in [0,_n_active_elem_on_proc[pid]) // is constructed. Only the indices for the elements we pass in are returned in the array. MeshCommunication().find_global_indices (mesh.comm(), bbox, it, end, global_index); for (dof_id_type cnt=0; it != end; ++it) { const Elem * elem = *it; libmesh_assert (!_global_index_by_pid_map.count(elem->id())); libmesh_assert_less (cnt, global_index.size()); libmesh_assert_less (global_index[cnt], _n_active_elem_on_proc[pid]); _global_index_by_pid_map.insert(std::make_pair(elem->id(), global_index[cnt++] + pid_offset)); } pid_offset += _n_active_elem_on_proc[pid]; } // create the unique mapping for all active elements independent of partitioning { MeshBase::const_element_iterator it = mesh.active_elements_begin(); const MeshBase::const_element_iterator end = mesh.active_elements_end(); // Calling this on all processors a unique range in [0,n_active_elem) is constructed. // Only the indices for the elements we pass in are returned in the array. MeshCommunication().find_global_indices (mesh.comm(), bbox, it, end, global_index); for (dof_id_type cnt=0; it != end; ++it) { const Elem * elem = *it; libmesh_assert (!global_index_map.count(elem->id())); libmesh_assert_less (cnt, global_index.size()); libmesh_assert_less (global_index[cnt], n_active_elem); global_index_map.insert(std::make_pair(elem->id(), global_index[cnt++])); } } // really, shouldn't be close! libmesh_assert_less_equal (global_index_map.size(), n_active_elem); libmesh_assert_less_equal (_global_index_by_pid_map.size(), n_active_elem); // At this point the two maps should be the same size. If they are not // then the number of active elements is not the same as the sum over all // processors of the number of active elements per processor, which means // there must be some unpartitioned objects out there. if (global_index_map.size() != _global_index_by_pid_map.size()) libmesh_error_msg("ERROR: ParmetisPartitioner cannot handle unpartitioned objects!"); } // Finally, we need to initialize the vertex (partition) weights and the initial subdomain // mapping. The subdomain mapping will be independent of the processor mapping, and is // defined by a simple mapping of the global indices we just found. { std::vector<dof_id_type> subdomain_bounds(mesh.n_processors()); const dof_id_type first_local_elem = _pmetis->vtxdist[mesh.processor_id()]; for (processor_id_type pid=0; pid<mesh.n_processors(); pid++) { dof_id_type tgt_subdomain_size = 0; // watch out for the case that n_subdomains < n_processors if (pid < static_cast<unsigned int>(_pmetis->nparts)) { tgt_subdomain_size = n_active_elem/std::min (cast_int<Parmetis::idx_t>(mesh.n_processors()), _pmetis->nparts); if (pid < n_active_elem%_pmetis->nparts) tgt_subdomain_size++; } if (pid == 0) subdomain_bounds[0] = tgt_subdomain_size; else subdomain_bounds[pid] = subdomain_bounds[pid-1] + tgt_subdomain_size; } libmesh_assert_equal_to (subdomain_bounds.back(), n_active_elem); MeshBase::const_element_iterator elem_it = mesh.active_local_elements_begin(); const MeshBase::const_element_iterator elem_end = mesh.active_local_elements_end(); for (; elem_it != elem_end; ++elem_it) { const Elem * elem = *elem_it; libmesh_assert (_global_index_by_pid_map.count(elem->id())); const dof_id_type global_index_by_pid = _global_index_by_pid_map[elem->id()]; libmesh_assert_less (global_index_by_pid, n_active_elem); const dof_id_type local_index = global_index_by_pid - first_local_elem; libmesh_assert_less (local_index, n_active_local_elem); libmesh_assert_less (local_index, _pmetis->vwgt.size()); // TODO:[BSK] maybe there is a better weight? _pmetis->vwgt[local_index] = elem->n_nodes(); // find the subdomain this element belongs in libmesh_assert (global_index_map.count(elem->id())); const dof_id_type global_index = global_index_map[elem->id()]; libmesh_assert_less (global_index, subdomain_bounds.back()); const unsigned int subdomain_id = std::distance(subdomain_bounds.begin(), std::lower_bound(subdomain_bounds.begin(), subdomain_bounds.end(), global_index)); libmesh_assert_less (subdomain_id, static_cast<unsigned int>(_pmetis->nparts)); libmesh_assert_less (local_index, _pmetis->part.size()); _pmetis->part[local_index] = subdomain_id; } } }