//-----------------------------------------------------------------------------
void BoundaryComputation::compute_boundary(const Mesh& mesh,
                                           const std::string type,
                                           BoundaryMesh& boundary)
{
  // We iterate over all facets in the mesh and check if they are on
  // the boundary. A facet is on the boundary if it is connected to
  // exactly one cell.

  log(TRACE, "Computing boundary mesh.");

  bool exterior = true;
  bool interior = true;
  if (type == "exterior")
    interior = false;
  else if (type == "interior")
    exterior = false;
  else if (type != "local")
  {
    dolfin_error("BoundaryComputation.cpp",
                 "determine boundary mesh type",
                 "Unknown boundary type (%d)", type.c_str());
  }

  // Get my MPI process rank and number of MPI processes
  const std::size_t my_rank = MPI::rank(mesh.mpi_comm());
  const std::size_t num_processes = MPI::size(mesh.mpi_comm());

  // Open boundary mesh for editing
  const std::size_t D = mesh.topology().dim();
  MeshEditor editor;
  editor.open(boundary, mesh.type().facet_type(), D - 1, mesh.geometry().dim());

  // Generate facet - cell connectivity if not generated
  mesh.init(D - 1, D);

  // Temporary arrays for assignment of indices to vertices on the boundary
  std::map<std::size_t, std::size_t> boundary_vertices;

  // Map of index "owners" (process responsible for assigning global index)
  std::map< std::size_t, std::size_t > global_index_owner;

  // Shared vertices for full mesh
  // FIXME: const_cast
  const std::map<unsigned int, std::set<unsigned int>> &
    shared_vertices = const_cast<Mesh&>(mesh).topology().shared_entities(0);

  // Shared vertices for boundary mesh
  std::map<unsigned int, std::set<unsigned int>> shared_boundary_vertices;
  if (exterior)
  {
    // Extract shared vertices if vertex is identified as part of globally
    // exterior facet.
    std::vector<std::size_t> boundary_global_indices;
    for (std::map<unsigned int, std::set<unsigned int>>::const_iterator
        sv_it=shared_vertices.begin(); sv_it != shared_vertices.end(); ++sv_it)
    {
      std::size_t local_mesh_index = sv_it->first;
      Vertex v(mesh, local_mesh_index);

      for (FacetIterator f(v); !f.end(); ++f)
      {
        if (f->num_global_entities(D) == 1)
        {
          const std::size_t global_mesh_index
            = mesh.topology().global_indices(0)[local_mesh_index];
          shared_boundary_vertices[local_mesh_index] = sv_it->second;
          boundary_global_indices.push_back(global_mesh_index);
          break;
        }
      }
    }

    // Distribute all shared boundary vertices
    std::vector<std::vector<std::size_t>> boundary_global_indices_all;
    MPI::all_gather(mesh.mpi_comm(), boundary_global_indices,
                     boundary_global_indices_all);

    // Identify and clean up discrepancies between shared vertices of full mesh
    // and shared vertices of boundary mesh
    for (auto sbv_it = shared_boundary_vertices.begin();
         sbv_it != shared_boundary_vertices.end(); )
    {
      std::size_t local_mesh_index = sbv_it->first;
      const std::size_t global_mesh_index
        = mesh.topology().global_indices(0)[local_mesh_index];

      // Check if this vertex is identified as boundary vertex on
      // other processes sharing this vertex
      std::set<unsigned int> &other_processes = sbv_it->second;
      for (auto  op_it=other_processes.begin();
           op_it != other_processes.end(); )
      {
        // Check if vertex is identified as boundary vertex on process *op_it
        bool is_boundary_vertex
          = (std::find(boundary_global_indices_all[*op_it].begin(),
                      boundary_global_indices_all[*op_it].end(),
                      global_mesh_index)
             != boundary_global_indices_all[*op_it].end());

        // Erase item if this is not identified as a boundary vertex
        // on process *op_it, and increment iterator
        if (!is_boundary_vertex)
        {
          // Erase item while carefully avoiding invalidating the
          // iterator: First increment it to get the next, valid
          // iterator, and then erase what it pointed to from
          // other_processes
          other_processes.erase(op_it++);
        }
        else
          ++op_it;
      }

      // Erase item from map if no other processes identify this
      // vertex as a boundary vertex, and increment iterator
      if (other_processes.size() == 0)
      {
        // Erase carefully as above
        shared_boundary_vertices.erase(sbv_it++);
      }
      else
        ++sbv_it;
    }
  }
  else
  {
    // If interior boundary, shared vertices are the same
    shared_boundary_vertices = shared_vertices;
  }

  // Determine boundary facet, count boundary vertices and facets, and
  // assign vertex indices
  std::size_t num_boundary_vertices = 0;
  std::size_t num_owned_vertices = 0;
  std::size_t num_boundary_cells = 0;

  MeshFunction<bool> boundary_facet(reference_to_no_delete_pointer(mesh),
                                    D - 1, false);
  for (FacetIterator f(mesh); !f.end(); ++f)
  {
    // Boundary facets are connected to exactly one cell
    if (f->num_entities(D) == 1)
    {
      const bool global_exterior_facet =  (f->num_global_entities(D) == 1);
      if (global_exterior_facet && exterior)
        boundary_facet[*f] = true;
      else if (!global_exterior_facet && interior)
        boundary_facet[*f] = true;

      if (boundary_facet[*f])
      {
        // Count boundary vertices and assign indices
        for (VertexIterator v(*f); !v.end(); ++v)
        {
          const std::size_t local_mesh_index = v->index();

          if (boundary_vertices.find(local_mesh_index)
              == boundary_vertices.end())
          {
            const std::size_t local_boundary_index = num_boundary_vertices;
            boundary_vertices[local_mesh_index] = local_boundary_index;

            // Determine "owner" of global_mesh_index
            std::size_t owner = my_rank;

            std::map<unsigned int, std::set<unsigned int>>::const_iterator
              other_processes_it
              = shared_boundary_vertices.find(local_mesh_index);
            if (other_processes_it != shared_boundary_vertices.end() && D > 1)
            {
              const std::set<unsigned int>& other_processes
                = other_processes_it->second;
              const std::size_t min_process
                = *std::min_element(other_processes.begin(),
                                    other_processes.end());
              boundary.topology().shared_entities(0)[local_boundary_index]
                = other_processes;

              // FIXME: More sophisticated ownership determination
              if (min_process < owner)
                owner = min_process;
            }
            const std::size_t global_mesh_index
              = mesh.topology().global_indices(0)[local_mesh_index];
            global_index_owner[global_mesh_index] = owner;

            // Update counts
            if (owner == my_rank)
              num_owned_vertices++;
            num_boundary_vertices++;
          }
        }

        // Count boundary cells (facets of the mesh)
        num_boundary_cells++;
      }
    }
  }

  // Initiate boundary topology
  /*
  boundary.topology().init(0, num_boundary_vertices,
                           MPI::sum(mesh.mpi_comm(), num_owned_vertices));
  boundary.topology().init(D - 1, num_boundary_cells,
                           MPI::sum(mesh.mpi_comm(), num_boundary_cells));
  */

  // Specify number of vertices and cells
  editor.init_vertices_global(num_boundary_vertices,
                              MPI::sum(mesh.mpi_comm(), num_owned_vertices));
  editor.init_cells_global(num_boundary_cells, MPI::sum(mesh.mpi_comm(),
                                                        num_boundary_cells));

  // Write vertex map
  MeshFunction<std::size_t>& vertex_map = boundary.entity_map(0);
  if (num_boundary_vertices > 0)
  {
    vertex_map.init(reference_to_no_delete_pointer(boundary), 0,
                    num_boundary_vertices);
  }
  std::map<std::size_t, std::size_t>::const_iterator it;
  for (it = boundary_vertices.begin(); it != boundary_vertices.end(); ++it)
    vertex_map[it->second] = it->first;

  // Get vertex ownership distribution, and find index to start global
  // numbering from
  std::vector<std::size_t> ownership_distribution(num_processes);
  MPI::all_gather(mesh.mpi_comm(), num_owned_vertices, ownership_distribution);
  std::size_t start_index = 0;
  for (std::size_t j = 0; j < my_rank; j++)
    start_index += ownership_distribution[j];

  // Set global indices of owned vertices, request global indices for
  // vertices owned elsewhere
  std::map<std::size_t, std::size_t> global_indices;
  std::vector<std::vector<std::size_t>> request_global_indices(num_processes);

  std::size_t current_index = start_index;
  for (std::size_t local_boundary_index = 0;
       local_boundary_index<num_boundary_vertices; local_boundary_index++)
  {
    const std::size_t local_mesh_index = vertex_map[local_boundary_index];
    const std::size_t global_mesh_index
      = mesh.topology().global_indices(0)[local_mesh_index];

    const std::size_t owner = global_index_owner[global_mesh_index];
    if (owner != my_rank)
      request_global_indices[owner].push_back(global_mesh_index);
    else
      global_indices[global_mesh_index] = current_index++;
  }

  // Send and receive requests from other processes
  std::vector<std::vector<std::size_t>> global_index_requests(num_processes);
  MPI::all_to_all(mesh.mpi_comm(), request_global_indices,
                  global_index_requests);

  // Find response to requests of global indices
  std::vector<std::vector<std::size_t>> respond_global_indices(num_processes);
  for (std::size_t i = 0; i < num_processes; i++)
  {
    const std::size_t N = global_index_requests[i].size();
    respond_global_indices[i].resize(N);

    for (std::size_t j = 0; j < N; j++)
      respond_global_indices[i][j]
        = global_indices[global_index_requests[i][j]];
  }

  // Scatter responses back to requesting processes
  std::vector<std::vector<std::size_t>> global_index_responses(num_processes);
  MPI::all_to_all(mesh.mpi_comm(), respond_global_indices,
                  global_index_responses);

  // Update global_indices
  for (std::size_t i = 0; i < num_processes; i++)
  {
    const std::size_t N = global_index_responses[i].size();
    // Check that responses are the same size as the requests made
    dolfin_assert(global_index_responses[i].size()
                  == request_global_indices[i].size());
    for (std::size_t j = 0; j < N; j++)
    {
      const std::size_t global_mesh_index = request_global_indices[i][j];
      const std::size_t global_boundary_index = global_index_responses[i][j];
      global_indices[global_mesh_index] = global_boundary_index;
    }
  }

  // Create vertices
  for (std::size_t local_boundary_index = 0;
       local_boundary_index < num_boundary_vertices; local_boundary_index++)
  {
    const std::size_t local_mesh_index = vertex_map[local_boundary_index];
    const std::size_t global_mesh_index
      = mesh.topology().global_indices(0)[local_mesh_index];
    const std::size_t global_boundary_index = global_indices[global_mesh_index];

    Vertex v(mesh, local_mesh_index);

    editor.add_vertex_global(local_boundary_index, global_boundary_index,
                             v.point());
  }

  // Find global index to start cell numbering from for current process
  std::vector<std::size_t> cell_distribution(num_processes);
  MPI::all_gather(mesh.mpi_comm(), num_boundary_cells, cell_distribution);
  std::size_t start_cell_index = 0;
  for (std::size_t i = 0; i < my_rank; i++)
    start_cell_index += cell_distribution[i];

  // Create cells (facets) and map between boundary mesh cells and facets parent
  MeshFunction<std::size_t>& cell_map = boundary.entity_map(D - 1);
  if (num_boundary_cells > 0)
  {
    cell_map.init(reference_to_no_delete_pointer(boundary), D - 1,
                  num_boundary_cells);
  }
  std::vector<std::size_t>
    cell(boundary.type().num_vertices(boundary.topology().dim()));
  std::size_t current_cell = 0;
  for (FacetIterator f(mesh); !f.end(); ++f)
  {
    if (boundary_facet[*f])
    {
      // Compute new vertex numbers for cell
      const unsigned int* vertices = f->entities(0);
      for (std::size_t i = 0; i < cell.size(); i++)
        cell[i] = boundary_vertices[vertices[i]];

      // Reorder vertices so facet is right-oriented w.r.t. facet
      // normal
      reorder(cell, *f);

      // Create mapping from boundary cell to mesh facet if requested
      if (!cell_map.empty())
        cell_map[current_cell] = f->index();

      // Add cell
      editor.add_cell(current_cell, start_cell_index+current_cell, cell);
      current_cell++;
    }
  }

  // Close mesh editor. Note the argument order=false to prevent
  // ordering from destroying the orientation of facets accomplished
  // by calling reorder() below.
  editor.close(false);
}
//-----------------------------------------------------------------------------
void MeshPartitioning::build_mesh(Mesh& mesh,
              const std::vector<std::size_t>& global_cell_indices,
              const boost::multi_array<std::size_t, 2>& cell_global_vertices,
              const std::vector<std::size_t>& vertex_indices,
              const boost::multi_array<double, 2>& vertex_coordinates,
              const std::map<std::size_t, std::size_t>& vertex_global_to_local,
              std::size_t tdim, std::size_t gdim, std::size_t num_global_cells,
              std::size_t num_global_vertices)
{
  Timer timer("PARALLEL 3: Build mesh (from local mesh data)");

  // Get number of processes and process number
  const std::size_t num_processes = MPI::num_processes();
  const std::size_t process_number = MPI::process_number();

  // Open mesh for editing
  mesh.clear();
  MeshEditor editor;
  editor.open(mesh, tdim, gdim);

  // Add vertices
  editor.init_vertices(vertex_coordinates.size());
  Point point(gdim);
  dolfin_assert(vertex_indices.size() == vertex_coordinates.size());
  for (std::size_t i = 0; i < vertex_coordinates.size(); ++i)
  {
    for (std::size_t j = 0; j < gdim; ++j)
      point[j] = vertex_coordinates[i][j];
    editor.add_vertex_global(i, vertex_indices[i], point);
  }

  // Add cells
  editor.init_cells(cell_global_vertices.size());
  const std::size_t num_cell_vertices = tdim + 1;
  std::vector<std::size_t> cell(num_cell_vertices);
  for (std::size_t i = 0; i < cell_global_vertices.size(); ++i)
  {
    for (std::size_t j = 0; j < num_cell_vertices; ++j)
    {
      // Get local cell vertex
      std::map<std::size_t, std::size_t>::const_iterator iter
          = vertex_global_to_local.find(cell_global_vertices[i][j]);
      dolfin_assert(iter != vertex_global_to_local.end());
      cell[j] = iter->second;
    }
    editor.add_cell(i, global_cell_indices[i], cell);
  }

  // Close mesh: Note that this must be done after creating the global
  // vertex map or otherwise the ordering in mesh.close() will be wrong
  // (based on local numbers).
  editor.close();

  // Set global number of cells and vertices
  mesh.topology().init_global(0, num_global_vertices);
  mesh.topology().init_global(tdim,  num_global_cells);

  // Construct boundary mesh
  BoundaryMesh bmesh(mesh, "exterior");

  const MeshFunction<std::size_t>& boundary_vertex_map = bmesh.entity_map(0);
  const std::size_t boundary_size = boundary_vertex_map.size();

  // Build sorted array of global boundary vertex indices (global
  // numbering)
  std::vector<std::size_t> global_vertex_send(boundary_size);
  for (std::size_t i = 0; i < boundary_size; ++i)
    global_vertex_send[i] = vertex_indices[boundary_vertex_map[i]];
  std::sort(global_vertex_send.begin(), global_vertex_send.end());

  // Receive buffer
  std::vector<std::size_t> global_vertex_recv;

  // Create shared_vertices data structure: mapping from shared vertices
  // to list of neighboring processes
  std::map<unsigned int, std::set<unsigned int> >& shared_vertices
        = mesh.topology().shared_entities(0);
  shared_vertices.clear();

  // FIXME: Remove computation from inside communication loop

  // Build shared vertex to sharing processes map
  for (std::size_t i = 1; i < num_processes; ++i)
  {
    // We send data to process p - i (i steps to the left)
    const int p = (process_number - i + num_processes) % num_processes;

    // We receive data from process p + i (i steps to the right)
    const int q = (process_number + i) % num_processes;

    // Send and receive
    MPI::send_recv(global_vertex_send, p, global_vertex_recv, q);

    // Compute intersection of global indices
    std::vector<std::size_t> intersection(std::min(global_vertex_send.size(),
                                                   global_vertex_recv.size()));
    std::vector<std::size_t>::iterator intersection_end
      = std::set_intersection(global_vertex_send.begin(),
                              global_vertex_send.end(),
                              global_vertex_recv.begin(),
                              global_vertex_recv.end(),
                              intersection.begin());

    // Fill shared vertices information
    std::vector<std::size_t>::const_iterator global_index;
    for (global_index = intersection.begin(); global_index != intersection_end;
         ++global_index)
    {
      // Get local index
      std::map<std::size_t, std::size_t>::const_iterator local_index;
      local_index = vertex_global_to_local.find(*global_index);
      dolfin_assert(local_index != vertex_global_to_local.end());

      // Insert (local index, [proc])
      shared_vertices[local_index->second].insert(q);
    }
  }
}