void performNodalMeshReduction(
    stk::mesh::Part &samplePart,
    stk::mesh::BulkData& bulkData)
{
  const stk::mesh::EntityRank nodeEntityRank(0);
  const stk::mesh::MetaData &metaData = stk::mesh::MetaData::get(bulkData);

  std::vector<stk::mesh::Entity *> sampleNodes;
  stk::mesh::get_selected_entities(samplePart, bulkData.buckets(nodeEntityRank), sampleNodes);

  const stk::mesh::Selector locallyOwned = stk::mesh::MetaData::get(bulkData).locally_owned_part();

  std::vector<stk::mesh::Entity *> relatedEntities;
  typedef boost::indirect_iterator<std::vector<stk::mesh::Entity *>::const_iterator> EntityIterator;
  for (EntityIterator it(sampleNodes.begin()), it_end(sampleNodes.end()); it != it_end; ++it) {
    const stk::mesh::PairIterRelation relations = it->relations();
    typedef stk::mesh::PairIterRelation::first_type RelationIterator;
    for (RelationIterator rel_it = relations.first, rel_it_end = relations.second; rel_it != rel_it_end; ++rel_it) {
      const Teuchos::Ptr<stk::mesh::Entity> relatedEntity(rel_it->entity());
      if (Teuchos::nonnull(relatedEntity) && locallyOwned(*relatedEntity)) {
        relatedEntities.push_back(relatedEntity.get());
      }
    }
  }
  std::sort(relatedEntities.begin(), relatedEntities.end(), stk::mesh::EntityLess());
  relatedEntities.erase(
      std::unique(relatedEntities.begin(), relatedEntities.end(), stk::mesh::EntityEqual()),
      relatedEntities.end());

  std::vector<stk::mesh::Entity *> sampleClosure;
  stk::mesh::find_closure(bulkData, relatedEntities, sampleClosure);

  // Keep only the closure, remove the rest, by decreasing entityRanks
  {
    const stk::mesh::Selector ownedOrShared = metaData.locally_owned_part() | metaData.globally_shared_part();
    typedef boost::indirect_iterator<std::vector<stk::mesh::Entity *>::const_iterator> EntityIterator;
    EntityIterator allKeepersEnd(sampleClosure.end());
    const EntityIterator allKeepersBegin(sampleClosure.begin());
    for (stk::mesh::EntityRank candidateRankCount = metaData.entity_rank_count(); candidateRankCount > 0; --candidateRankCount) {
      const stk::mesh::EntityRank candidateRank = candidateRankCount - 1;
      const EntityIterator keepersBegin = std::lower_bound(allKeepersBegin, allKeepersEnd,
                                                           stk::mesh::EntityKey(candidateRank, 0),
                                                           stk::mesh::EntityLess());
      const EntityIterator keepersEnd = allKeepersEnd;
      std::vector<stk::mesh::Entity *> candidates;
      stk::mesh::get_selected_entities(ownedOrShared, bulkData.buckets(candidateRank), candidates);
      {
        BulkModification modification(bulkData);
        std::set_difference(candidates.begin(), candidates.end(),
                            keepersBegin.base(), keepersEnd.base(),
                            EntityDestructor(modification),
                            stk::mesh::EntityLess());
      }
      allKeepersEnd = keepersBegin;
    }
  }
}
void performNodalMeshReduction(
    stk::mesh::Part &samplePart,
    stk::mesh::BulkData& bulkData)
{
  const stk::mesh::MetaData &metaData = stk::mesh::MetaData::get(bulkData);

  std::vector<stk::mesh::Entity> sampleNodes;
  stk::mesh::get_selected_entities(samplePart, bulkData.buckets(stk::topology::NODE_RANK), sampleNodes);

  const stk::mesh::Selector locallyOwned = stk::mesh::MetaData::get(bulkData).locally_owned_part();

  std::vector<stk::mesh::Entity> relatedEntities;
  typedef std::vector<stk::mesh::Entity>::const_iterator EntityIterator;
  for (EntityIterator it(sampleNodes.begin()), it_end(sampleNodes.end()); it != it_end; ++it) {
    for (stk::mesh::EntityRank r = stk::topology::NODE_RANK; r < metaData.entity_rank_count(); ++r) {
      stk::mesh::Entity const* relations = bulkData.begin(*it, r);
      const int num_rels = bulkData.num_connectivity(*it, r);
      for (int i = 0; i < num_rels; ++i) {
        stk::mesh::Entity relatedEntity = relations[i];
        if (bulkData.is_valid(relatedEntity) && locallyOwned(bulkData.bucket(relatedEntity))) {
          relatedEntities.push_back(relatedEntity);
        }
      }
    }
  }
  std::sort(relatedEntities.begin(), relatedEntities.end(), stk::mesh::EntityLess(bulkData));
  relatedEntities.erase(
      std::unique(relatedEntities.begin(), relatedEntities.end()),
      relatedEntities.end());

  std::vector<stk::mesh::Entity> sampleClosure;
  stk::mesh::find_closure(bulkData, relatedEntities, sampleClosure);

  // Keep only the closure, remove the rest, by decreasing entityRanks
  {
    const stk::mesh::Selector ownedOrShared = metaData.locally_owned_part() | metaData.globally_shared_part();
    EntityIterator allKeepersEnd(sampleClosure.end());
    const EntityIterator allKeepersBegin(sampleClosure.begin());
    for (size_t candidateRankCount = metaData.entity_rank_count(); candidateRankCount > 0; --candidateRankCount) {
      const stk::mesh::EntityRank candidateRank = static_cast<stk::mesh::EntityRank>(candidateRankCount - 1);
      const EntityIterator keepersBegin = std::lower_bound(allKeepersBegin, allKeepersEnd,
                                                           stk::mesh::EntityKey(candidateRank, 0),
                                                           stk::mesh::EntityLess(bulkData));
      const EntityIterator keepersEnd = allKeepersEnd;
      std::vector<stk::mesh::Entity> candidates;
      stk::mesh::get_selected_entities(ownedOrShared, bulkData.buckets(candidateRank), candidates);
      {
        BulkModification modification(bulkData);
        std::set_difference(candidates.begin(), candidates.end(),
                            keepersBegin.base(), keepersEnd.base(),
                            EntityDestructor(modification),
                            stk::mesh::EntityLess(bulkData));
      }
      allKeepersEnd = keepersBegin;
    }
  }
}
/*!
 * \brief Constructor.
 */
CoarseLocalSearch::CoarseLocalSearch( 
    const EntityIterator& entity_iterator,
    const Teuchos::RCP<EntityLocalMap>& local_map,
    const Teuchos::ParameterList& parameters )
{
    // Setup the centroid array. These will be interleaved.
    int space_dim = 0;
    int num_entity = entity_iterator.size();
    if ( num_entity > 0 )
    {
	space_dim = entity_iterator.begin()->physicalDimension();
    }
    d_entity_centroids.resize( space_dim * num_entity );

    // Add the centroids.
    EntityIterator entity_it;
    EntityIterator begin_it = entity_iterator.begin();
    EntityIterator end_it = entity_iterator.end();
    int entity_local_id = 0;
    for ( entity_it = begin_it;
	  entity_it != end_it;
	  ++entity_it )
    {
	local_map->centroid( 
	    *entity_it, 
	    d_entity_centroids(space_dim*entity_local_id,space_dim) );
	d_entity_map.emplace( entity_local_id, *entity_it );
	++entity_local_id;
    }

    // Build a static search tree.
    int leaf_size = 20;
    if ( parameters.isParameter("Coarse Local Search Leaf Size") )
    {
	leaf_size = parameters.get<int>("Coarse Local Search Leaf Size");
    }
    leaf_size = std::min( leaf_size, num_entity );
    d_tree = SearchTreeFactory::createStaticTree(
	space_dim, d_entity_centroids(), leaf_size );
    DTK_ENSURE( Teuchos::nonnull(d_tree) );
}
//---------------------------------------------------------------------------//
// Assemble the local bounding box around an iterator.
void CoarseGlobalSearch::assembleBoundingBox( 
    const EntityIterator& entity_iterator,
    Teuchos::Tuple<double,6>& bounding_box ) const
{
    double max = std::numeric_limits<double>::max();
    bounding_box = Teuchos::tuple( max, max, max, -max, -max, -max );
    Teuchos::Tuple<double,6> entity_bounds;
    EntityIterator entity_begin = entity_iterator.begin();
    EntityIterator entity_end = entity_iterator.end();
    EntityIterator entity_it;
    for ( entity_it = entity_begin; entity_it != entity_end; ++entity_it )
    {
	entity_it->boundingBox( entity_bounds );
	for ( int n = 0; n < 3; ++n )
	{
	    bounding_box[n] = 
		std::min( bounding_box[n], entity_bounds[n] );
	    bounding_box[n+3] = 
		std::max( bounding_box[n+3], entity_bounds[n+3] );
	}
    }
}
//---------------------------------------------------------------------------//
// Constructor.
ParallelSearch::ParallelSearch( 
    const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
    const int physical_dimension,
    const EntityIterator& domain_iterator,
    const Teuchos::RCP<EntityLocalMap>& domain_local_map,
    const Teuchos::ParameterList& parameters )
    : d_comm( comm )
    , d_physical_dim( physical_dimension )
    , d_track_missed_range_entities( false )
    , d_missed_range_entity_ids( 0 )
{
    // Set the parameters with the local map.
    domain_local_map->setParameters( parameters );
    
    // Determine if we are tracking missed range entities.
    if ( parameters.isParameter("Track Missed Range Entities") )
    {
	d_track_missed_range_entities =
	    parameters.get<bool>("Track Missed Range Entities");
    }

    // Build a coarse global search as this object must be collective across
    // the communicator.
    d_coarse_global_search = Teuchos::rcp(
	new CoarseGlobalSearch(d_comm, physical_dimension, 
			       domain_iterator, parameters) );

    // Only do the local search if there are local domain entities.
    d_empty_domain = ( 0 == domain_iterator.size() );
    if ( !d_empty_domain )
    {
	d_coarse_local_search = Teuchos::rcp(
	    new CoarseLocalSearch(domain_iterator, domain_local_map, parameters) );
	d_fine_local_search = Teuchos::rcp(
	    new FineLocalSearch(domain_local_map) );
    }
}
//---------------------------------------------------------------------------//
// Search the domain with the range entity centroids and construct the
// graph. This will update the state of the object.
void ParallelSearch::search( 
    const EntityIterator& range_iterator,
    const Teuchos::RCP<EntityLocalMap>& range_local_map,
    const Teuchos::ParameterList& parameters )
{
    // Set the parameters with the local map.
    range_local_map->setParameters( parameters );

    // Empty range flag.
    d_empty_range = ( 0 == range_iterator.size() );

    // Reset the state of the object.
    d_range_owner_ranks.clear();
    d_domain_to_range_map.clear();
    d_range_to_domain_map.clear();
    d_parametric_coords.clear();

    // Perform a coarse global search to redistribute the range entities.
    Teuchos::Array<EntityId> range_entity_ids;
    Teuchos::Array<int> range_owner_ranks;
    Teuchos::Array<double> range_centroids;
    d_coarse_global_search->search( 
	range_iterator, range_local_map, parameters,
	range_entity_ids, range_owner_ranks, range_centroids );

    // If needed, extract the range entities that were missed during the
    // coarse global search.
    Teuchos::Array<EntityId> found_range_entity_ids;
    Teuchos::Array<int> found_range_ranks;
    Teuchos::Array<EntityId> missed_range_entity_ids;
    Teuchos::Array<int> missed_range_ranks;
    if ( d_track_missed_range_entities )
    {
	missed_range_entity_ids = Teuchos::Array<EntityId>( 
	    d_coarse_global_search->getMissedRangeEntityIds() );
	missed_range_ranks.assign( missed_range_entity_ids.size(),
				   d_comm->getRank() );
    }

    // Only do the local search if there are local domain entities.
    Teuchos::Array<int> export_range_ranks;
    Teuchos::Array<EntityId> export_data;
    if ( !d_empty_domain )
    {
	// For each range centroid, perform a local search.
	int num_range = range_entity_ids.size();
	Teuchos::Array<Entity> domain_neighbors;
	Teuchos::Array<Entity> domain_parents;
	Teuchos::Array<double> reference_coordinates;
	Teuchos::Array<double> local_coords( d_physical_dim );
	int num_parents = 0;
	for ( int n = 0; n < num_range; ++n )
	{
	    // Perform a coarse local search to get the nearest domain
	    // entities to the point.
	    d_coarse_local_search->search( 
		range_centroids(d_physical_dim*n,d_physical_dim),
		parameters,
		domain_neighbors );
	
	    // Perform a fine local search to get the entities the point maps
	    // to.
	    d_fine_local_search->search( 
		domain_neighbors,
		range_centroids(d_physical_dim*n,d_physical_dim),
		parameters,
		domain_parents,
		reference_coordinates );

	    // Store the potentially multiple parametric realizations of the
	    // point.
	    std::unordered_map<EntityId,Teuchos::Array<double> > ref_map;
	    num_parents = domain_parents.size();
	    for ( int p = 0; p < num_parents; ++p )
	    {
		// Store the range data in the domain parallel decomposition.
		local_coords().assign( 
		    reference_coordinates(d_physical_dim*p,d_physical_dim) );
		d_range_owner_ranks.emplace(
		    range_entity_ids[n], range_owner_ranks[n] );
		d_domain_to_range_map.emplace(
		    domain_parents[p].id(), range_entity_ids[n] );
		ref_map.emplace(
		    domain_parents[p].id(), local_coords );

		// Extract the data to communicate back to the range parallel
		// decomposition. 
		export_range_ranks.push_back( range_owner_ranks[n] );
		export_data.push_back( range_entity_ids[n] );
		export_data.push_back( domain_parents[p].id() );
		export_data.push_back( 
		    Teuchos::as<EntityId>(d_comm->getRank()) );
	    }

	    // If we found parents for the point, store them.
	    if ( num_parents > 0 )
	    {
		d_parametric_coords.emplace( range_entity_ids[n], ref_map );

		// If we are tracking missed entities, also track those that
		// we found so we can determine if an entity was found after
		// being sent to multiple destinations.
		if ( d_track_missed_range_entities )
		{
		    found_range_entity_ids.push_back( range_entity_ids[n] );
		    found_range_ranks.push_back( range_owner_ranks[n] );
		}
	    }
	    
	    // Otherwise, if we are tracking missed entities report this.
	    else if ( d_track_missed_range_entities )
	    {
		missed_range_entity_ids.push_back( range_entity_ids[n] );
		missed_range_ranks.push_back( range_owner_ranks[n] );
	    }
	}
    }

    // Back-communicate the domain entities in which we found each range
    // entity to complete the mapping.
    Tpetra::Distributor domain_to_range_dist( d_comm );
    int num_import = 
	domain_to_range_dist.createFromSends( export_range_ranks() );
    Teuchos::Array<EntityId> domain_data( 3*num_import );
    Teuchos::ArrayView<const EntityId> export_data_view = export_data();
    domain_to_range_dist.doPostsAndWaits( export_data_view, 3, domain_data() );

    // Store the domain data in the range parallel decomposition.
    for ( int i = 0; i < num_import; ++i )
    {
	d_domain_owner_ranks.emplace(
	    domain_data[3*i+1], domain_data[3*i+2] );
	d_range_to_domain_map.emplace(
	    domain_data[3*i], domain_data[3*i+1] );
    }

    // If we are tracking missed entities, back-communicate the missing entities
    // and found entities to determine which entities are actually missing.
    if ( d_track_missed_range_entities )
    {
	// Back-communicate the missing entities.
	Tpetra::Distributor missed_range_dist( d_comm );
	int num_import_missed = 
	    missed_range_dist.createFromSends( missed_range_ranks() );
	Teuchos::Array<EntityId> import_missed( num_import_missed );
	Teuchos::ArrayView<const EntityId> missed_view = 
	    missed_range_entity_ids();
	missed_range_dist.doPostsAndWaits( missed_view, 1, import_missed() );

	// Back-communicate the found entities.
	Tpetra::Distributor found_range_dist( d_comm );
	int num_import_found = 
	    found_range_dist.createFromSends( found_range_ranks() );
	Teuchos::Array<EntityId> import_found( num_import_found );
	Teuchos::ArrayView<const EntityId> found_view = 
	    found_range_entity_ids();
	found_range_dist.doPostsAndWaits( found_view, 1, import_found() );

	// Intersect the found and missed entities to determine if there are any
	// that were found on one process but missed on another.
	std::sort( import_missed.begin(), import_missed.end() );
	std::sort( import_found.begin(), import_found.end() );
	Teuchos::Array<EntityId> false_positive_missed(
	    import_missed.size() + import_found.size() );
	auto false_positive_end = 
	    std::set_intersection( import_missed.begin(), import_missed.end(),
				   import_found.begin(), import_found.end(),
				   false_positive_missed.begin() );

	// Create a list of missed entities without the false positives.
	d_missed_range_entity_ids.resize( num_import_missed );
	auto missed_range_end = std::set_difference( 
	    import_missed.begin(), import_missed.end(),
	    false_positive_missed.begin(), false_positive_end,
	    d_missed_range_entity_ids.begin() );

	// Create a unique list of missed entities without the false positives.
	std::sort( d_missed_range_entity_ids.begin(), missed_range_end );
	auto missed_range_unique_end = std::unique(
	    d_missed_range_entity_ids.begin(), missed_range_end );
	d_missed_range_entity_ids.resize(
	    std::distance(d_missed_range_entity_ids.begin(),
			  missed_range_unique_end) );
    }
}
//---------------------------------------------------------------------------//
// Redistribute a set of range entity centroid coordinates with their owner
// ranks to the owning domain process.
void CoarseGlobalSearch::search( const EntityIterator& range_iterator,
				 const Teuchos::RCP<EntityLocalMap>& range_local_map,
				 const Teuchos::ParameterList& parameters,
				 Teuchos::Array<EntityId>& range_entity_ids,
				 Teuchos::Array<int>& range_owner_ranks,
				 Teuchos::Array<double>& range_centroids ) const
{
    // Assemble the local range bounding box.
    Teuchos::Tuple<double,6> range_box;
    assembleBoundingBox( range_iterator, range_box );

    // Find the domain boxes it intersects with.
    Teuchos::Array<int> neighbor_ranks;
    Teuchos::Array<Teuchos::Tuple<double,6> > neighbor_boxes;
    int num_domains = d_domain_boxes.size();
    for ( int n = 0; n < num_domains; ++n )
    {
	if ( boxesIntersect(range_box,d_domain_boxes[n],d_inclusion_tol) )
	{
	    neighbor_ranks.push_back(n);
	    neighbor_boxes.push_back( d_domain_boxes[n] );
	}
    }

    // For each local range entity, find the neighbors we should send it to.
    int num_neighbors = neighbor_boxes.size();
    EntityIterator range_begin = range_iterator.begin();
    EntityIterator range_end = range_iterator.end();
    EntityIterator range_it;
    Teuchos::Array<EntityId> send_ids;
    Teuchos::Array<int> send_ranks;
    Teuchos::Array<double> send_centroids;
    Teuchos::Array<double> centroid(d_space_dim);
    bool found_entity = false;
    for ( range_it = range_begin; range_it != range_end; ++range_it )
    {
	// Get the centroid.
	range_local_map->centroid( *range_it, centroid() );

	// Check the neighbors.
	found_entity = false;
	for ( int n = 0; n < num_neighbors; ++n )
	{
	    // If the centroid is in the box, add it to the send list.
	    if ( pointInBox(centroid(),neighbor_boxes[n],d_inclusion_tol) )
	    {
		found_entity = true;
		send_ids.push_back( range_it->id() );
		send_ranks.push_back( neighbor_ranks[n] );
		for ( int d = 0; d < d_space_dim; ++d )
		{
		    send_centroids.push_back( centroid[d] );
		}
	    }
	}

	// If we are tracking missed range entities, add the entity to the
	// list.
	if ( d_track_missed_range_entities && !found_entity )
	{
	    d_missed_range_entity_ids.push_back( range_it->id() );
	}
    }
    int num_send = send_ranks.size();
    Teuchos::Array<int> range_ranks( num_send, d_comm->getRank() );

    // Create a distributor.
    Tpetra::Distributor distributor(d_comm);
    int num_range_import = distributor.createFromSends( send_ranks() );

    // Redistribute the range entity ids.
    Teuchos::ArrayView<const EntityId> send_ids_view = send_ids();
    range_entity_ids.resize( num_range_import );
    distributor.doPostsAndWaits( send_ids_view, 1, range_entity_ids() );

    // Redistribute the range entity owner ranks.
    Teuchos::ArrayView<const int> range_ranks_view = range_ranks();
    range_owner_ranks.resize( num_range_import );
    distributor.doPostsAndWaits( range_ranks_view, 1, range_owner_ranks() );

    // Redistribute the range entity centroids.
    range_centroids.resize( d_space_dim*num_range_import );
    Teuchos::ArrayView<const double> send_centroids_view = send_centroids();
    distributor.doPostsAndWaits( 
	send_centroids_view, d_space_dim, range_centroids() );
}