Beispiel #1
0
void Partitioner::partition (MeshBase & mesh,
                             const unsigned int n)
{
  libmesh_parallel_only(mesh.comm());

  // BSK - temporary fix while redistribution is integrated 6/26/2008
  // Uncomment this to not repartition in parallel
  //   if (!mesh.is_serial())
  //     return;

  // we cannot partition into more pieces than we have
  // active elements!
  const unsigned int n_parts =
    static_cast<unsigned int>
    (std::min(mesh.n_active_elem(), static_cast<dof_id_type>(n)));

  // Set the number of partitions in the mesh
  mesh.set_n_partitions()=n_parts;

  if (n_parts == 1)
    {
      this->single_partition (mesh);
      return;
    }

  // First assign a temporary partitioning to any unpartitioned elements
  Partitioner::partition_unpartitioned_elements(mesh, n_parts);

  // Call the partitioning function
  this->_do_partition(mesh,n_parts);

  // Set the parent's processor ids
  Partitioner::set_parent_processor_ids(mesh);

  // Redistribute elements if necessary, before setting node processor
  // ids, to make sure those will be set consistently
  mesh.redistribute();

#ifdef DEBUG
  MeshTools::libmesh_assert_valid_remote_elems(mesh);

  // Messed up elem processor_id()s can leave us without the child
  // elements we need to restrict vectors on a distributed mesh
  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
#endif

  // Set the node's processor ids
  Partitioner::set_node_processor_ids(mesh);

#ifdef DEBUG
  MeshTools::libmesh_assert_valid_procids<Elem>(mesh);
#endif

  // Give derived Mesh classes a chance to update any cached data to
  // reflect the new partitioning
  mesh.update_post_partitioning();
}
Beispiel #2
0
 MeshSerializer::MeshSerializer(MeshBase& mesh, bool need_serial) :
   _mesh(mesh),
   reparallelize(false)
 {
   libmesh_parallel_only(mesh.comm());
   if (need_serial && !_mesh.is_serial()) {
     reparallelize = true;
     _mesh.allgather();
   }
 }
Beispiel #3
0
//--------------------------------------------------------------------------
void TopologyMap::init(MeshBase& mesh)
{
  // This function must be run on all processors at once
  // for non-serial meshes
  if (!mesh.is_serial())
    libmesh_parallel_only(mesh.comm());

  START_LOG("init()", "TopologyMap");

  // Clear the old map
  _map.clear();

  this->fill(mesh);

  STOP_LOG("init()", "TopologyMap");
}
Beispiel #4
0
void LocationMap<T>::init(MeshBase & mesh)
{
  // This function must be run on all processors at once
  // for non-serial meshes
  if (!mesh.is_serial())
    libmesh_parallel_only(mesh.comm());

  START_LOG("init()", "LocationMap");

  // Clear the old map
  _map.clear();

  // Cache a bounding box
  _lower_bound.clear();
  _lower_bound.resize(LIBMESH_DIM, std::numeric_limits<Real>::max());
  _upper_bound.clear();
  _upper_bound.resize(LIBMESH_DIM, -std::numeric_limits<Real>::max());

  MeshBase::node_iterator       it  = mesh.nodes_begin();
  const MeshBase::node_iterator end = mesh.nodes_end();

  for (; it != end; ++it)
    {
      Node * node = *it;

      for (unsigned int i=0; i != LIBMESH_DIM; ++i)
        {
          // Expand the bounding box if necessary
          _lower_bound[i] = std::min(_lower_bound[i],
                                     (*node)(i));
          _upper_bound[i] = std::max(_upper_bound[i],
                                     (*node)(i));
        }
    }

  // On a parallel mesh we might not yet have a full bounding box
  if (!mesh.is_serial())
    {
      mesh.comm().min(_lower_bound);
      mesh.comm().max(_upper_bound);
    }

  this->fill(mesh);

  STOP_LOG("init()", "LocationMap");
}
Beispiel #5
0
void LocationMap<T>::init(MeshBase & mesh)
{
  // This function must be run on all processors at once
  // for non-serial meshes
  if (!mesh.is_serial())
    libmesh_parallel_only(mesh.comm());

  LOG_SCOPE("init()", "LocationMap");

  // Clear the old map
  _map.clear();

  // Cache a bounding box
  _lower_bound.clear();
  _lower_bound.resize(LIBMESH_DIM, std::numeric_limits<Real>::max());
  _upper_bound.clear();
  _upper_bound.resize(LIBMESH_DIM, -std::numeric_limits<Real>::max());

  for (auto & node : mesh.node_ptr_range())
    for (unsigned int i=0; i != LIBMESH_DIM; ++i)
      {
        // Expand the bounding box if necessary
        _lower_bound[i] = std::min(_lower_bound[i],
                                   (*node)(i));
        _upper_bound[i] = std::max(_upper_bound[i],
                                   (*node)(i));
      }

  // On a parallel mesh we might not yet have a full bounding box
  if (!mesh.is_serial())
    {
      mesh.comm().min(_lower_bound);
      mesh.comm().max(_upper_bound);
    }

  this->fill(mesh);
}
Beispiel #6
0
void Partitioner::set_node_processor_ids(MeshBase & mesh)
{
  LOG_SCOPE("set_node_processor_ids()","Partitioner");

  // This function must be run on all processors at once
  libmesh_parallel_only(mesh.comm());

  // If we have any unpartitioned elements at this
  // stage there is a problem
  libmesh_assert (MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
                                    mesh.unpartitioned_elements_end()) == 0);


  //   const dof_id_type orig_n_local_nodes = mesh.n_local_nodes();

  //   libMesh::err << "[" << mesh.processor_id() << "]: orig_n_local_nodes="
  //     << orig_n_local_nodes << std::endl;

  // Build up request sets.  Each node is currently owned by a processor because
  // it is connected to an element owned by that processor.  However, during the
  // repartitioning phase that element may have been assigned a new processor id, but
  // it is still resident on the original processor.  We need to know where to look
  // for new ids before assigning new ids, otherwise we may be asking the wrong processors
  // for the wrong information.
  //
  // The only remaining issue is what to do with unpartitioned nodes.  Since they are required
  // to live on all processors we can simply rely on ourselves to number them properly.
  std::vector<std::vector<dof_id_type> >
    requested_node_ids(mesh.n_processors());

  // Loop over all the nodes, count the ones on each processor.  We can skip ourself
  std::vector<dof_id_type> ghost_nodes_from_proc(mesh.n_processors(), 0);

  MeshBase::node_iterator       node_it  = mesh.nodes_begin();
  const MeshBase::node_iterator node_end = mesh.nodes_end();

  for (; node_it != node_end; ++node_it)
    {
      Node * node = *node_it;
      libmesh_assert(node);
      const processor_id_type current_pid = node->processor_id();
      if (current_pid != mesh.processor_id() &&
          current_pid != DofObject::invalid_processor_id)
        {
          libmesh_assert_less (current_pid, ghost_nodes_from_proc.size());
          ghost_nodes_from_proc[current_pid]++;
        }
    }

  // We know how many objects live on each processor, so reserve()
  // space for each.
  for (processor_id_type pid=0; pid != mesh.n_processors(); ++pid)
    requested_node_ids[pid].reserve(ghost_nodes_from_proc[pid]);

  // We need to get the new pid for each node from the processor
  // which *currently* owns the node.  We can safely skip ourself
  for (node_it = mesh.nodes_begin(); node_it != node_end; ++node_it)
    {
      Node * node = *node_it;
      libmesh_assert(node);
      const processor_id_type current_pid = node->processor_id();
      if (current_pid != mesh.processor_id() &&
          current_pid != DofObject::invalid_processor_id)
        {
          libmesh_assert_less (current_pid, requested_node_ids.size());
          libmesh_assert_less (requested_node_ids[current_pid].size(),
                               ghost_nodes_from_proc[current_pid]);
          requested_node_ids[current_pid].push_back(node->id());
        }

      // Unset any previously-set node processor ids
      node->invalidate_processor_id();
    }

  // Loop over all the active elements
  MeshBase::element_iterator       elem_it  = mesh.active_elements_begin();
  const MeshBase::element_iterator elem_end = mesh.active_elements_end();

  for ( ; elem_it != elem_end; ++elem_it)
    {
      Elem * elem = *elem_it;
      libmesh_assert(elem);

      libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);

      // For each node, set the processor ID to the min of
      // its current value and this Element's processor id.
      //
      // TODO: we would probably get better parallel partitioning if
      // we did something like "min for even numbered nodes, max for
      // odd numbered".  We'd need to be careful about how that would
      // affect solution ordering for I/O, though.
      for (unsigned int n=0; n<elem->n_nodes(); ++n)
        elem->node_ptr(n)->processor_id() = std::min(elem->node_ptr(n)->processor_id(),
                                                     elem->processor_id());
    }

  // And loop over the subactive elements, but don't reassign
  // nodes that are already active on another processor.
  MeshBase::element_iterator       sub_it  = mesh.subactive_elements_begin();
  const MeshBase::element_iterator sub_end = mesh.subactive_elements_end();

  for ( ; sub_it != sub_end; ++sub_it)
    {
      Elem * elem = *sub_it;
      libmesh_assert(elem);

      libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);

      for (unsigned int n=0; n<elem->n_nodes(); ++n)
        if (elem->node_ptr(n)->processor_id() == DofObject::invalid_processor_id)
          elem->node_ptr(n)->processor_id() = elem->processor_id();
    }

  // Same for the inactive elements -- we will have already gotten most of these
  // nodes, *except* for the case of a parent with a subset of children which are
  // ghost elements.  In that case some of the parent nodes will not have been
  // properly handled yet
  MeshBase::element_iterator       not_it  = mesh.not_active_elements_begin();
  const MeshBase::element_iterator not_end = mesh.not_active_elements_end();

  for ( ; not_it != not_end; ++not_it)
    {
      Elem * elem = *not_it;
      libmesh_assert(elem);

      libmesh_assert_not_equal_to (elem->processor_id(), DofObject::invalid_processor_id);

      for (unsigned int n=0; n<elem->n_nodes(); ++n)
        if (elem->node_ptr(n)->processor_id() == DofObject::invalid_processor_id)
          elem->node_ptr(n)->processor_id() = elem->processor_id();
    }

  // We can't assert that all nodes are connected to elements, because
  // a DistributedMesh with NodeConstraints might have pulled in some
  // remote nodes solely for evaluating those constraints.
  // MeshTools::libmesh_assert_connected_nodes(mesh);

  // For such nodes, we'll do a sanity check later when making sure
  // that we successfully reset their processor ids to something
  // valid.

  // Next set node ids from other processors, excluding self
  for (processor_id_type p=1; p != mesh.n_processors(); ++p)
    {
      // Trade my requests with processor procup and procdown
      processor_id_type procup = cast_int<processor_id_type>
        ((mesh.processor_id() + p) % mesh.n_processors());
      processor_id_type procdown = cast_int<processor_id_type>
        ((mesh.n_processors() + mesh.processor_id() - p) %
         mesh.n_processors());
      std::vector<dof_id_type> request_to_fill;
      mesh.comm().send_receive(procup, requested_node_ids[procup],
                               procdown, request_to_fill);

      // Fill those requests in-place
      for (std::size_t i=0; i != request_to_fill.size(); ++i)
        {
          Node & node = mesh.node_ref(request_to_fill[i]);
          const processor_id_type new_pid = node.processor_id();

          // We may have an invalid processor_id() on nodes that have been
          // "detatched" from coarsened-away elements but that have not yet
          // themselves been removed.
          // libmesh_assert_not_equal_to (new_pid, DofObject::invalid_processor_id);
          // libmesh_assert_less (new_pid, mesh.n_partitions()); // this is the correct test --
          request_to_fill[i] = new_pid;           //  the number of partitions may
        }                                         //  not equal the number of processors

      // Trade back the results
      std::vector<dof_id_type> filled_request;
      mesh.comm().send_receive(procdown, request_to_fill,
                               procup,   filled_request);
      libmesh_assert_equal_to (filled_request.size(), requested_node_ids[procup].size());

      // And copy the id changes we've now been informed of
      for (std::size_t i=0; i != filled_request.size(); ++i)
        {
          Node & node = mesh.node_ref(requested_node_ids[procup][i]);

          // this is the correct test -- the number of partitions may
          // not equal the number of processors

          // But: we may have an invalid processor_id() on nodes that
          // have been "detatched" from coarsened-away elements but
          // that have not yet themselves been removed.
          // libmesh_assert_less (filled_request[i], mesh.n_partitions());

          node.processor_id(cast_int<processor_id_type>(filled_request[i]));
        }
    }

#ifdef DEBUG
  MeshTools::libmesh_assert_valid_procids<Node>(mesh);
#endif
}
Beispiel #7
0
void Partitioner::set_parent_processor_ids(MeshBase & mesh)
{
  // Ignore the parameter when !LIBMESH_ENABLE_AMR
  libmesh_ignore(mesh);

  LOG_SCOPE("set_parent_processor_ids()", "Partitioner");

#ifdef LIBMESH_ENABLE_AMR

  // If the mesh is serial we have access to all the elements,
  // in particular all the active ones.  We can therefore set
  // the parent processor ids indirecly through their children, and
  // set the subactive processor ids while examining their active
  // ancestors.
  // By convention a parent is assigned to the minimum processor
  // of all its children, and a subactive is assigned to the processor
  // of its active ancestor.
  if (mesh.is_serial())
    {
      // Loop over all the active elements in the mesh
      MeshBase::element_iterator       it  = mesh.active_elements_begin();
      const MeshBase::element_iterator end = mesh.active_elements_end();

      for ( ; it!=end; ++it)
        {
          Elem * child  = *it;

          // First set descendents

          std::vector<const Elem *> subactive_family;
          child->total_family_tree(subactive_family);
          for (unsigned int i = 0; i != subactive_family.size(); ++i)
            const_cast<Elem *>(subactive_family[i])->processor_id() = child->processor_id();

          // Then set ancestors

          Elem * parent = child->parent();

          while (parent)
            {
              // invalidate the parent id, otherwise the min below
              // will not work if the current parent id is less
              // than all the children!
              parent->invalidate_processor_id();

              for (unsigned int c=0; c<parent->n_children(); c++)
                {
                  child = parent->child_ptr(c);
                  libmesh_assert(child);
                  libmesh_assert(!child->is_remote());
                  libmesh_assert_not_equal_to (child->processor_id(), DofObject::invalid_processor_id);
                  parent->processor_id() = std::min(parent->processor_id(),
                                                    child->processor_id());
                }
              parent = parent->parent();
            }
        }
    }

  // When the mesh is parallel we cannot guarantee that parents have access to
  // all their children.
  else
    {
      // Setting subactive processor ids is easy: we can guarantee
      // that children have access to all their parents.

      // Loop over all the active elements in the mesh
      MeshBase::element_iterator       it  = mesh.active_elements_begin();
      const MeshBase::element_iterator end = mesh.active_elements_end();

      for ( ; it!=end; ++it)
        {
          Elem * child  = *it;

          std::vector<const Elem *> subactive_family;
          child->total_family_tree(subactive_family);
          for (unsigned int i = 0; i != subactive_family.size(); ++i)
            const_cast<Elem *>(subactive_family[i])->processor_id() = child->processor_id();
        }

      // When the mesh is parallel we cannot guarantee that parents have access to
      // all their children.

      // We will use a brute-force approach here.  Each processor finds its parent
      // elements and sets the parent pid to the minimum of its
      // semilocal descendants.
      // A global reduction is then performed to make sure the true minimum is found.
      // As noted, this is required because we cannot guarantee that a parent has
      // access to all its children on any single processor.
      libmesh_parallel_only(mesh.comm());
      libmesh_assert(MeshTools::n_elem(mesh.unpartitioned_elements_begin(),
                                       mesh.unpartitioned_elements_end()) == 0);

      const dof_id_type max_elem_id = mesh.max_elem_id();

      std::vector<processor_id_type>
        parent_processor_ids (std::min(communication_blocksize,
                                       max_elem_id));

      for (dof_id_type blk=0, last_elem_id=0; last_elem_id<max_elem_id; blk++)
        {
          last_elem_id =
            std::min(static_cast<dof_id_type>((blk+1)*communication_blocksize),
                     max_elem_id);
          const dof_id_type first_elem_id = blk*communication_blocksize;

          std::fill (parent_processor_ids.begin(),
                     parent_processor_ids.end(),
                     DofObject::invalid_processor_id);

          // first build up local contributions to parent_processor_ids
          MeshBase::element_iterator       not_it  = mesh.ancestor_elements_begin();
          const MeshBase::element_iterator not_end = mesh.ancestor_elements_end();

          bool have_parent_in_block = false;

          for ( ; not_it != not_end; ++not_it)
            {
              Elem * parent = *not_it;

              const dof_id_type parent_idx = parent->id();
              libmesh_assert_less (parent_idx, max_elem_id);

              if ((parent_idx >= first_elem_id) &&
                  (parent_idx <  last_elem_id))
                {
                  have_parent_in_block = true;
                  processor_id_type parent_pid = DofObject::invalid_processor_id;

                  std::vector<const Elem *> active_family;
                  parent->active_family_tree(active_family);
                  for (unsigned int i = 0; i != active_family.size(); ++i)
                    parent_pid = std::min (parent_pid, active_family[i]->processor_id());

                  const dof_id_type packed_idx = parent_idx - first_elem_id;
                  libmesh_assert_less (packed_idx, parent_processor_ids.size());

                  parent_processor_ids[packed_idx] = parent_pid;
                }
            }

          // then find the global minimum
          mesh.comm().min (parent_processor_ids);

          // and assign the ids, if we have a parent in this block.
          if (have_parent_in_block)
            for (not_it = mesh.ancestor_elements_begin();
                 not_it != not_end; ++not_it)
              {
                Elem * parent = *not_it;

                const dof_id_type parent_idx = parent->id();

                if ((parent_idx >= first_elem_id) &&
                    (parent_idx <  last_elem_id))
                  {
                    const dof_id_type packed_idx = parent_idx - first_elem_id;
                    libmesh_assert_less (packed_idx, parent_processor_ids.size());

                    const processor_id_type parent_pid =
                      parent_processor_ids[packed_idx];

                    libmesh_assert_not_equal_to (parent_pid, DofObject::invalid_processor_id);

                    parent->processor_id() = parent_pid;
                  }
              }
        }
    }

#endif // LIBMESH_ENABLE_AMR
}
Beispiel #8
0
LibMeshInit::~LibMeshInit()
{
  // We can't delete, finalize, etc. more than once without
  // reinitializing in between
  libmesh_exceptionless_assert(!libMesh::closed());

  // Delete reference counted singleton(s)
  Singleton::cleanup();

  // Clear the thread task manager we started
  task_scheduler.reset();

  // Let's be sure we properly close on every processor at once:
  libmesh_parallel_only(this->comm());


  // Force the \p ReferenceCounter to print
  // its reference count information.  This allows
  // us to find memory leaks.  By default the
  // \p ReferenceCounter only prints its information
  // when the last created object has been destroyed.
  // That does no good if we are leaking memory!
  ReferenceCounter::print_info ();


  // Print an informative message if we detect a memory leak
  if (ReferenceCounter::n_objects() != 0)
    {
      libMesh::err << "Memory leak detected!"
                   << std::endl;

#if !defined(LIBMESH_ENABLE_REFERENCE_COUNTING) || defined(NDEBUG)

      libMesh::err << "Compile in DEBUG mode with --enable-reference-counting"
                   << std::endl
                   << "for more information"
                   << std::endl;
#endif

    }

  //  print the perflog to individual processor's file.
  libMesh::perflog.print_log();

  // Now clear the logging object, we don't want it to print
  // a second time during the PerfLog destructor.
  libMesh::perflog.clear();

  // Reconnect the output streams
  // (don't do this, or we will get messages from objects
  //  that go out of scope after the following return)
  //std::cout.rdbuf(std::cerr.rdbuf());


  // Set the initialized() flag to false
  libMeshPrivateData::_is_initialized = false;

  if (libMesh::on_command_line ("--redirect-stdout"))
    {
      // If stdout/stderr were redirected to files, reset them now.
      libMesh::out.rdbuf (out_buf);
      libMesh::err.rdbuf (err_buf);
    }

  // If we built our own output streams, we want to clean them up.
  if (libMesh::on_command_line ("--separate-libmeshout"))
    {
      delete libMesh::out.get();
      delete libMesh::err.get();

      libMesh::out.reset(std::cout);
      libMesh::err.reset(std::cerr);
    }

#ifdef LIBMESH_ENABLE_EXCEPTIONS
  // Reset the old terminate handler; maybe the user code wants to
  // keep doing C++ stuff after closing libMesh stuff.
  std::set_terminate(old_terminate_handler);
#endif


  if (libMesh::on_command_line("--enable-fpe"))
    libMesh::enableFPE(false);

#if defined(LIBMESH_HAVE_PETSC)
  // Allow the user to bypass PETSc finalization
  if (!libMesh::on_command_line ("--disable-petsc")
#if defined(LIBMESH_HAVE_MPI)
      && !libMesh::on_command_line ("--disable-mpi")
#endif
      )
    {
# if defined(LIBMESH_HAVE_SLEPC)
      if (libmesh_initialized_slepc)
        SlepcFinalize();
# else
      if (libmesh_initialized_petsc)
        PetscFinalize();
# endif
    }
#endif


#if defined(LIBMESH_HAVE_MPI)
  // Allow the user to bypass MPI finalization
  if (!libMesh::on_command_line ("--disable-mpi"))
    {
      this->_comm.clear();
#ifndef LIBMESH_DISABLE_COMMWORLD
      Parallel::Communicator_World.clear();
#endif

      if (libmesh_initialized_mpi)
        MPI_Finalize();
    }
#endif
}
Beispiel #9
0
LibMeshInit::LibMeshInit (int argc, const char* const* argv,
                          MPI_Comm COMM_WORLD_IN)
#endif
{
  // should _not_ be initialized already.
  libmesh_assert (!libMesh::initialized());

  // Build a command-line parser.
  command_line.reset (new GetPot (argc, argv));

  // Disable performance logging upon request
  {
    if (libMesh::on_command_line ("--disable-perflog"))
      libMesh::perflog.disable_logging();
  }

  // Build a task scheduler
  {
    // Get the requested number of threads, defaults to 1 to avoid MPI and
    // multithreading competition.  If you would like to use MPI and multithreading
    // at the same time then (n_mpi_processes_per_node)x(n_threads) should be the
    //  number of processing cores per node.
    std::vector<std::string> n_threads(2);
    n_threads[0] = "--n_threads";
    n_threads[1] = "--n-threads";
    libMesh::libMeshPrivateData::_n_threads =
      libMesh::command_line_value (n_threads, 1);

    // Set the number of OpenMP threads to the same as the number of threads libMesh is going to use
#ifdef LIBMESH_HAVE_OPENMP
    omp_set_num_threads(libMesh::libMeshPrivateData::_n_threads);
#endif

    task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
  }

  // Construct singletons who may be at risk of the
  // "static initialization order fiasco"
  Singleton::setup();

  // Make sure the construction worked
  libmesh_assert(remote_elem);

#if defined(LIBMESH_HAVE_MPI)

  // Allow the user to bypass MPI initialization
  if (!libMesh::on_command_line ("--disable-mpi"))
    {
      // Check whether the calling program has already initialized
      // MPI, and avoid duplicate Init/Finalize
      int flag;
      MPI_Initialized (&flag);

      if (!flag)
        {
#if MPI_VERSION > 1
          int mpi_thread_provided;
          const int mpi_thread_requested = libMesh::n_threads() > 1 ?
                                           MPI_THREAD_FUNNELED :
                                           MPI_THREAD_SINGLE;

          MPI_Init_thread (&argc, const_cast<char***>(&argv),
                           mpi_thread_requested, &mpi_thread_provided);

          if ((libMesh::n_threads() > 1) &&
              (mpi_thread_provided < MPI_THREAD_FUNNELED))
            {
              libmesh_warning("Warning: MPI failed to guarantee MPI_THREAD_FUNNELED\n" << 
                              "for a threaded run.\n" <<
                              "Be sure your library is funneled-thread-safe..." <<
                               std::endl);

              // Ideally, if an MPI stack tells us it's unsafe for us
              // to use threads, we shouldn't use threads.
              // In practice, we've encountered one MPI stack (an
              // mvapich2 configuration) that returned
              // MPI_THREAD_SINGLE as a proper warning, two stacks
              // that handle MPI_THREAD_FUNNELED properly, and two
              // current stacks plus a couple old stacks that return
              // MPI_THREAD_SINGLE but support libMesh threaded runs
              // anyway.

              // libMesh::libMeshPrivateData::_n_threads = 1;
              // task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
            }
#else
          if (libMesh::libMeshPrivateData::_n_threads > 1)
            {
              libmesh_warning("Warning: using MPI1 for threaded code.\n" <<
                              "Be sure your library is funneled-thread-safe..." <<
                              std::endl);
            }

          MPI_Init (&argc, const_cast<char***>(&argv));
#endif
          libmesh_initialized_mpi = true;
        }

      // Duplicate the input communicator for internal use
      // And get a Parallel::Communicator copy too, to use
      // as a default for that API
      this->_comm = COMM_WORLD_IN;

      libMesh::GLOBAL_COMM_WORLD = COMM_WORLD_IN;

#ifndef LIBMESH_DISABLE_COMMWORLD
      libMesh::COMM_WORLD = COMM_WORLD_IN;
      Parallel::Communicator_World = COMM_WORLD_IN;
#endif

      //MPI_Comm_set_name not supported in at least SGI MPT's MPI implementation
      //MPI_Comm_set_name (libMesh::COMM_WORLD, "libMesh::COMM_WORLD");

      libMeshPrivateData::_processor_id =
        libmesh_cast_int<processor_id_type>(this->comm().rank());
      libMeshPrivateData::_n_processors =
        libmesh_cast_int<processor_id_type>(this->comm().size());

      // Set up an MPI error handler if requested.  This helps us get
      // into a debugger with a proper stack when an MPI error occurs.
      if (libMesh::on_command_line ("--handle-mpi-errors"))
        {
#if MPI_VERSION > 1
          MPI_Comm_create_errhandler(libMesh_MPI_Handler, &libmesh_errhandler);
          MPI_Comm_set_errhandler(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler);
          MPI_Comm_set_errhandler(MPI_COMM_WORLD, libmesh_errhandler);
#else
          MPI_Errhandler_create(libMesh_MPI_Handler, &libmesh_errhandler);
          MPI_Errhandler_set(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler);
          MPI_Errhandler_set(MPI_COMM_WORLD, libmesh_errhandler);
#endif // #if MPI_VERSION > 1
        }
    }

  // Could we have gotten bad values from the above calls?
  libmesh_assert_greater (libMeshPrivateData::_n_processors, 0);

  // The libmesh_cast_int already tested _processor_id>=0
  // libmesh_assert_greater_equal (libMeshPrivateData::_processor_id, 0);

  // Let's be sure we properly initialize on every processor at once:
  libmesh_parallel_only(this->comm());

#endif

#if defined(LIBMESH_HAVE_PETSC)

  // Allow the user to bypass PETSc initialization
  if (!libMesh::on_command_line ("--disable-petsc")

#if defined(LIBMESH_HAVE_MPI)
      // If the user bypassed MPI, we'd better be safe and assume that
      // PETSc was built to require it; otherwise PETSc initialization
      // dies.
      && !libMesh::on_command_line ("--disable-mpi")
#endif
      )
    {
      int ierr=0;

      PETSC_COMM_WORLD = libMesh::GLOBAL_COMM_WORLD;

      // Check whether the calling program has already initialized
      // PETSc, and avoid duplicate Initialize/Finalize
      PetscBool petsc_already_initialized;
      ierr = PetscInitialized(&petsc_already_initialized);
      CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
      if (petsc_already_initialized != PETSC_TRUE)
        libmesh_initialized_petsc = true;
# if defined(LIBMESH_HAVE_SLEPC)

      // If SLEPc allows us to check whether the calling program
      // has already initialized it, we do that, and avoid
      // duplicate Initialize/Finalize.
      // We assume that SLEPc will handle PETSc appropriately,
      // which it does in the versions we've checked.
#  if !SLEPC_VERSION_LESS_THAN(2,3,3)
      if (!SlepcInitializeCalled)
#  endif
        {
          ierr = SlepcInitialize  (&argc, const_cast<char***>(&argv), NULL, NULL);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
          libmesh_initialized_slepc = true;
        }
# else
      if (libmesh_initialized_petsc)
        {
          ierr = PetscInitialize (&argc, const_cast<char***>(&argv), NULL, NULL);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
        }
# endif
    }
#endif

  // Re-parse the command-line arguments.  Note that PETSc and MPI
  // initialization above may have removed command line arguments
  // that are not relevant to this application in the above calls.
  // We don't want a false-positive by detecting those arguments.
  command_line->parse_command_line (argc, argv);

  // The following line is an optimization when simultaneous
  // C and C++ style access to output streams is not required.
  // The amount of benefit which occurs is probably implementation
  // defined, and may be nothing.  On the other hand, I have seen
  // some IO tests where IO peformance improves by a factor of two.
  if (!libMesh::on_command_line ("--sync-with-stdio"))
    std::ios::sync_with_stdio(false);

  // Honor the --separate-libmeshout command-line option.
  // When this is specified, the library uses an independent ostream
  // for libMesh::out/libMesh::err messages, and
  // std::cout and std::cerr are untouched by any other options
  if (libMesh::on_command_line ("--separate-libmeshout"))
    {
      // Redirect.  We'll share streambufs with cout/cerr for now, but
      // presumably anyone using this option will want to replace the
      // bufs later.
      std::ostream* newout = new std::ostream(std::cout.rdbuf());
      libMesh::out = *newout;
      std::ostream* newerr = new std::ostream(std::cerr.rdbuf());
      libMesh::err = *newerr;
    }

  // Honor the --redirect-stdout command-line option.
  // When this is specified each processor sends
  // libMesh::out/libMesh::err messages to
  // stdout.processor.####
  if (libMesh::on_command_line ("--redirect-stdout"))
    {
      std::ostringstream filename;
      filename << "stdout.processor." << libMesh::global_processor_id();
      _ofstream.reset (new std::ofstream (filename.str().c_str()));
      // Redirect, saving the original streambufs!
      out_buf = libMesh::out.rdbuf (_ofstream->rdbuf());
      err_buf = libMesh::err.rdbuf (_ofstream->rdbuf());
    }

  // redirect libMesh::out to nothing on all
  // other processors unless explicitly told
  // not to via the --keep-cout command-line argument.
  if (libMesh::global_processor_id() != 0)
    if (!libMesh::on_command_line ("--keep-cout"))
      libMesh::out.rdbuf (NULL);

  // Check command line to override printing
  // of reference count information.
  if(libMesh::on_command_line("--disable-refcount-printing") )
    ReferenceCounter::disable_print_counter_info();

#ifdef LIBMESH_ENABLE_EXCEPTIONS
  // Set our terminate handler to write stack traces in the event of a
  // crash
  old_terminate_handler = std::set_terminate(libmesh_terminate_handler);
#endif


  if (libMesh::on_command_line("--enable-fpe"))
    libMesh::enableFPE(true);

  // The library is now ready for use
  libMeshPrivateData::_is_initialized = true;


  // Make sure these work.  Library methods
  // depend on these being implemented properly,
  // so this is a good time to test them!
  libmesh_assert (libMesh::initialized());
  libmesh_assert (!libMesh::closed());
}
Beispiel #10
0
void ParmetisPartitioner::_do_repartition (MeshBase & mesh,
                                           const unsigned int n_sbdmns)
{
  libmesh_assert_greater (n_sbdmns, 0);

  // Check for an easy return
  if (n_sbdmns == 1)
    {
      this->single_partition(mesh);
      return;
    }

  // This function must be run on all processors at once
  libmesh_parallel_only(mesh.comm());

  // What to do if the Parmetis library IS NOT present
#ifndef LIBMESH_HAVE_PARMETIS

  libmesh_here();
  libMesh::err << "ERROR: The library has been built without" << std::endl
               << "Parmetis support.  Using a Metis"          << std::endl
               << "partitioner instead!"                      << std::endl;

  MetisPartitioner mp;

  mp.partition (mesh, n_sbdmns);

  // What to do if the Parmetis library IS present
#else

  // Revert to METIS on one processor.
  if (mesh.n_processors() == 1)
    {
      MetisPartitioner mp;
      mp.partition (mesh, n_sbdmns);
      return;
    }

  LOG_SCOPE("repartition()", "ParmetisPartitioner");

  // Initialize the data structures required by ParMETIS
  this->initialize (mesh, n_sbdmns);

  // Make sure all processors have enough active local elements.
  // Parmetis tends to crash when it's given only a couple elements
  // per partition.
  {
    bool all_have_enough_elements = true;
    for (processor_id_type pid=0; pid<_n_active_elem_on_proc.size(); pid++)
      if (_n_active_elem_on_proc[pid] < MIN_ELEM_PER_PROC)
        all_have_enough_elements = false;

    // Parmetis will not work unless each processor has some
    // elements. Specifically, it will abort when passed a NULL
    // partition array on *any* of the processors.
    if (!all_have_enough_elements)
      {
        // FIXME: revert to METIS, although this requires a serial mesh
        MeshSerializer serialize(mesh);
        MetisPartitioner mp;
        mp.partition (mesh, n_sbdmns);
        return;
      }
  }

  // build the graph corresponding to the mesh
  this->build_graph (mesh);


  // Partition the graph
  std::vector<Parmetis::idx_t> vsize(_pmetis->vwgt.size(), 1);
  Parmetis::real_t itr = 1000000.0;
  MPI_Comm mpi_comm = mesh.comm().get();

  // Call the ParMETIS adaptive repartitioning method.  This respects the
  // original partitioning when computing the new partitioning so as to
  // minimize the required data redistribution.
  Parmetis::ParMETIS_V3_AdaptiveRepart(_pmetis->vtxdist.empty() ? libmesh_nullptr : &_pmetis->vtxdist[0],
                                       _pmetis->xadj.empty()    ? libmesh_nullptr : &_pmetis->xadj[0],
                                       _pmetis->adjncy.empty()  ? libmesh_nullptr : &_pmetis->adjncy[0],
                                       _pmetis->vwgt.empty()    ? libmesh_nullptr : &_pmetis->vwgt[0],
                                       vsize.empty()            ? libmesh_nullptr : &vsize[0],
                                       libmesh_nullptr,
                                       &_pmetis->wgtflag,
                                       &_pmetis->numflag,
                                       &_pmetis->ncon,
                                       &_pmetis->nparts,
                                       _pmetis->tpwgts.empty()  ? libmesh_nullptr : &_pmetis->tpwgts[0],
                                       _pmetis->ubvec.empty()   ? libmesh_nullptr : &_pmetis->ubvec[0],
                                       &itr,
                                       &_pmetis->options[0],
                                       &_pmetis->edgecut,
                                       _pmetis->part.empty()    ? libmesh_nullptr : &_pmetis->part[0],
                                       &mpi_comm);

  // Assign the returned processor ids
  this->assign_partitioning (mesh);

#endif // #ifndef LIBMESH_HAVE_PARMETIS ... else ...

}
Beispiel #11
0
void ParmetisPartitioner::assign_partitioning (MeshBase & mesh)
{
  // This function must be run on all processors at once
  libmesh_parallel_only(mesh.comm());

  const dof_id_type
    first_local_elem = _pmetis->vtxdist[mesh.processor_id()];

  std::vector<std::vector<dof_id_type> >
    requested_ids(mesh.n_processors()),
    requests_to_fill(mesh.n_processors());

  MeshBase::element_iterator elem_it  = mesh.active_elements_begin();
  MeshBase::element_iterator elem_end = mesh.active_elements_end();

  for (; elem_it != elem_end; ++elem_it)
    {
      Elem * elem = *elem_it;

      // we need to get the index from the owning processor
      // (note we cannot assign it now -- we are iterating
      // over elements again and this will be bad!)
      libmesh_assert_less (elem->processor_id(), requested_ids.size());
      requested_ids[elem->processor_id()].push_back(elem->id());
    }

  // Trade with all processors (including self) to get their indices
  for (processor_id_type pid=0; pid<mesh.n_processors(); pid++)
    {
      // Trade my requests with processor procup and procdown
      const processor_id_type procup = (mesh.processor_id() + pid) % mesh.n_processors();
      const processor_id_type procdown = (mesh.n_processors() +
                                          mesh.processor_id() - pid) % mesh.n_processors();

      mesh.comm().send_receive (procup,   requested_ids[procup],
                                procdown, requests_to_fill[procdown]);

      // we can overwrite these requested ids in-place.
      for (std::size_t i=0; i<requests_to_fill[procdown].size(); i++)
        {
          const dof_id_type requested_elem_index =
            requests_to_fill[procdown][i];

          libmesh_assert(_global_index_by_pid_map.count(requested_elem_index));

          const dof_id_type global_index_by_pid =
            _global_index_by_pid_map[requested_elem_index];

          const dof_id_type local_index =
            global_index_by_pid - first_local_elem;

          libmesh_assert_less (local_index, _pmetis->part.size());
          libmesh_assert_less (local_index, mesh.n_active_local_elem());

          const unsigned int elem_procid =
            static_cast<unsigned int>(_pmetis->part[local_index]);

          libmesh_assert_less (elem_procid, static_cast<unsigned int>(_pmetis->nparts));

          requests_to_fill[procdown][i] = elem_procid;
        }

      // Trade back
      mesh.comm().send_receive (procdown, requests_to_fill[procdown],
                                procup,   requested_ids[procup]);
    }

  // and finally assign the partitioning.
  // note we are iterating in exactly the same order
  // used to build up the request, so we can expect the
  // required entries to be in the proper sequence.
  elem_it  = mesh.active_elements_begin();
  elem_end = mesh.active_elements_end();

  for (std::vector<unsigned int> counters(mesh.n_processors(), 0);
       elem_it != elem_end; ++elem_it)
    {
      Elem * elem = *elem_it;

      const processor_id_type current_pid = elem->processor_id();

      libmesh_assert_less (counters[current_pid], requested_ids[current_pid].size());

      const processor_id_type elem_procid =
        requested_ids[current_pid][counters[current_pid]++];

      libmesh_assert_less (elem_procid, static_cast<unsigned int>(_pmetis->nparts));
      elem->processor_id() = elem_procid;
    }
}
Beispiel #12
0
void ExactSolution::_compute_error(const std::string & sys_name,
                                   const std::string & unknown_name,
                                   std::vector<Real> & error_vals)
{
    // Make sure we aren't "overconfigured"
    libmesh_assert (!(_exact_values.size() && _equation_systems_fine));

    // We need a commmunicator.
    const Parallel::Communicator & communicator(_equation_systems.comm());

    // This function must be run on all processors at once
    libmesh_parallel_only(communicator);

    // Get a reference to the system whose error is being computed.
    // If we have a fine grid, however, we'll integrate on that instead
    // for more accuracy.
    const System & computed_system = _equation_systems_fine ?
                                     _equation_systems_fine->get_system(sys_name) :
                                     _equation_systems.get_system (sys_name);

    const Real time = _equation_systems.get_system(sys_name).time;

    const unsigned int sys_num = computed_system.number();
    const unsigned int var = computed_system.variable_number(unknown_name);
    const unsigned int var_component =
        computed_system.variable_scalar_number(var, 0);

    // Prepare a global solution and a MeshFunction of the coarse system if we need one
    UniquePtr<MeshFunction> coarse_values;
    UniquePtr<NumericVector<Number> > comparison_soln = NumericVector<Number>::build(_equation_systems.comm());
    if (_equation_systems_fine)
    {
        const System & comparison_system
            = _equation_systems.get_system(sys_name);

        std::vector<Number> global_soln;
        comparison_system.update_global_solution(global_soln);
        comparison_soln->init(comparison_system.solution->size(), true, SERIAL);
        (*comparison_soln) = global_soln;

        coarse_values = UniquePtr<MeshFunction>
                        (new MeshFunction(_equation_systems,
                                          *comparison_soln,
                                          comparison_system.get_dof_map(),
                                          comparison_system.variable_number(unknown_name)));
        coarse_values->init();
    }

    // Initialize any functors we're going to use
    for (unsigned int i=0; i != _exact_values.size(); ++i)
        if (_exact_values[i])
            _exact_values[i]->init();

    for (unsigned int i=0; i != _exact_derivs.size(); ++i)
        if (_exact_derivs[i])
            _exact_derivs[i]->init();

    for (unsigned int i=0; i != _exact_hessians.size(); ++i)
        if (_exact_hessians[i])
            _exact_hessians[i]->init();

    // Get a reference to the dofmap and mesh for that system
    const DofMap & computed_dof_map = computed_system.get_dof_map();

    const MeshBase & _mesh = computed_system.get_mesh();

    // Grab which element dimensions are present in the mesh
    const std::set<unsigned char> & elem_dims = _mesh.elem_dimensions();

    // Zero the error before summation
    // 0 - sum of square of function error (L2)
    // 1 - sum of square of gradient error (H1 semi)
    // 2 - sum of square of Hessian error (H2 semi)
    // 3 - sum of sqrt(square of function error) (L1)
    // 4 - max of sqrt(square of function error) (Linfty)
    // 5 - sum of square of curl error (HCurl semi)
    // 6 - sum of square of div error (HDiv semi)
    error_vals = std::vector<Real>(7, 0.);

    // Construct Quadrature rule based on default quadrature order
    const FEType & fe_type  = computed_dof_map.variable_type(var);

    unsigned int n_vec_dim = FEInterface::n_vec_dim( _mesh, fe_type );

    // FIXME: MeshFunction needs to be updated to support vector-valued
    //        elements before we can use a reference solution.
    if( (n_vec_dim > 1) && _equation_systems_fine )
    {
        libMesh::err << "Error calculation using reference solution not yet\n"
                     << "supported for vector-valued elements."
                     << std::endl;
        libmesh_not_implemented();
    }


    // Allow space for dims 0-3, even if we don't use them all
    std::vector<FEGenericBase<OutputShape> *> fe_ptrs(4, libmesh_nullptr);
    std::vector<QBase *> q_rules(4, libmesh_nullptr);

    // Prepare finite elements for each dimension present in the mesh
    for( std::set<unsigned char>::const_iterator d_it = elem_dims.begin();
            d_it != elem_dims.end(); ++d_it )
    {
        q_rules[*d_it] =
            fe_type.default_quadrature_rule (*d_it, _extra_order).release();

        // Construct finite element object

        fe_ptrs[*d_it] = FEGenericBase<OutputShape>::build(*d_it, fe_type).release();

        // Attach quadrature rule to FE object
        fe_ptrs[*d_it]->attach_quadrature_rule (q_rules[*d_it]);
    }

    // The global degree of freedom indices associated
    // with the local degrees of freedom.
    std::vector<dof_id_type> dof_indices;


    //
    // Begin the loop over the elements
    //
    // TODO: this ought to be threaded (and using subordinate
    // MeshFunction objects in each thread rather than a single
    // master)
    MeshBase::const_element_iterator       el     = _mesh.active_local_elements_begin();
    const MeshBase::const_element_iterator end_el = _mesh.active_local_elements_end();

    for ( ; el != end_el; ++el)
    {
        // Store a pointer to the element we are currently
        // working on.  This allows for nicer syntax later.
        const Elem * elem = *el;
        const unsigned int dim = elem->dim();

        const subdomain_id_type elem_subid = elem->subdomain_id();

        // If the variable is not active on this subdomain, don't bother
        if(!computed_system.variable(var).active_on_subdomain(elem_subid))
            continue;

        /* If the variable is active, then we're going to restrict the
           MeshFunction evaluations to the current element subdomain.
           This is for cases such as mixed dimension meshes where we want
           to restrict the calculation to one particular domain. */
        std::set<subdomain_id_type> subdomain_id;
        subdomain_id.insert(elem_subid);

        FEGenericBase<OutputShape> * fe = fe_ptrs[dim];
        QBase * qrule = q_rules[dim];
        libmesh_assert(fe);
        libmesh_assert(qrule);

        // The Jacobian*weight at the quadrature points.
        const std::vector<Real> & JxW = fe->get_JxW();

        // The value of the shape functions at the quadrature points
        // i.e. phi(i) = phi_values[i][qp]
        const std::vector<std::vector<OutputShape> > &  phi_values = fe->get_phi();

        // The value of the shape function gradients at the quadrature points
        const std::vector<std::vector<typename FEGenericBase<OutputShape>::OutputGradient> > &
        dphi_values = fe->get_dphi();

        // The value of the shape function curls at the quadrature points
        // Only computed for vector-valued elements
        const std::vector<std::vector<typename FEGenericBase<OutputShape>::OutputShape> > * curl_values = libmesh_nullptr;

        // The value of the shape function divergences at the quadrature points
        // Only computed for vector-valued elements
        const std::vector<std::vector<typename FEGenericBase<OutputShape>::OutputDivergence> > * div_values = libmesh_nullptr;

        if( FEInterface::field_type(fe_type) == TYPE_VECTOR )
        {
            curl_values = &fe->get_curl_phi();
            div_values = &fe->get_div_phi();
        }

#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES
        // The value of the shape function second derivatives at the quadrature points
        const std::vector<std::vector<typename FEGenericBase<OutputShape>::OutputTensor> > &
        d2phi_values = fe->get_d2phi();
#endif

        // The XYZ locations (in physical space) of the quadrature points
        const std::vector<Point> & q_point = fe->get_xyz();

        // reinitialize the element-specific data
        // for the current element
        fe->reinit (elem);

        // Get the local to global degree of freedom maps
        computed_dof_map.dof_indices    (elem, dof_indices, var);

        // The number of quadrature points
        const unsigned int n_qp = qrule->n_points();

        // The number of shape functions
        const unsigned int n_sf =
            cast_int<unsigned int>(dof_indices.size());

        //
        // Begin the loop over the Quadrature points.
        //
        for (unsigned int qp=0; qp<n_qp; qp++)
        {
            // Real u_h = 0.;
            // RealGradient grad_u_h;

            typename FEGenericBase<OutputShape>::OutputNumber u_h(0.);

            typename FEGenericBase<OutputShape>::OutputNumberGradient grad_u_h;
#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES
            typename FEGenericBase<OutputShape>::OutputNumberTensor grad2_u_h;
#endif
            typename FEGenericBase<OutputShape>::OutputNumber curl_u_h(0.0);
            typename FEGenericBase<OutputShape>::OutputNumberDivergence div_u_h = 0.0;

            // Compute solution values at the current
            // quadrature point.  This reqiures a sum
            // over all the shape functions evaluated
            // at the quadrature point.
            for (unsigned int i=0; i<n_sf; i++)
            {
                // Values from current solution.
                u_h      += phi_values[i][qp]*computed_system.current_solution  (dof_indices[i]);
                grad_u_h += dphi_values[i][qp]*computed_system.current_solution (dof_indices[i]);
#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES
                grad2_u_h += d2phi_values[i][qp]*computed_system.current_solution (dof_indices[i]);
#endif
                if( FEInterface::field_type(fe_type) == TYPE_VECTOR )
                {
                    curl_u_h += (*curl_values)[i][qp]*computed_system.current_solution (dof_indices[i]);
                    div_u_h += (*div_values)[i][qp]*computed_system.current_solution (dof_indices[i]);
                }
            }

            // Compute the value of the error at this quadrature point
            typename FEGenericBase<OutputShape>::OutputNumber exact_val(0);
            RawAccessor<typename FEGenericBase<OutputShape>::OutputNumber> exact_val_accessor( exact_val, dim );
            if (_exact_values.size() > sys_num && _exact_values[sys_num])
            {
                for( unsigned int c = 0; c < n_vec_dim; c++)
                    exact_val_accessor(c) =
                        _exact_values[sys_num]->
                        component(var_component+c, q_point[qp], time);
            }
            else if (_equation_systems_fine)
            {
                // FIXME: Needs to be updated for vector-valued elements
                DenseVector<Number> output(1);
                (*coarse_values)(q_point[qp],time,output,&subdomain_id);
                exact_val = output(0);
            }
            const typename FEGenericBase<OutputShape>::OutputNumber val_error = u_h - exact_val;

            // Add the squares of the error to each contribution
            Real error_sq = TensorTools::norm_sq(val_error);
            error_vals[0] += JxW[qp]*error_sq;

            Real norm = sqrt(error_sq);
            error_vals[3] += JxW[qp]*norm;

            if(error_vals[4]<norm) {
                error_vals[4] = norm;
            }

            // Compute the value of the error in the gradient at this
            // quadrature point
            typename FEGenericBase<OutputShape>::OutputNumberGradient exact_grad;
            RawAccessor<typename FEGenericBase<OutputShape>::OutputNumberGradient> exact_grad_accessor( exact_grad, LIBMESH_DIM );
            if (_exact_derivs.size() > sys_num && _exact_derivs[sys_num])
            {
                for (unsigned int c = 0; c < n_vec_dim; c++)
                    for (unsigned int d = 0; d < LIBMESH_DIM; d++)
                        exact_grad_accessor(d + c*LIBMESH_DIM) =
                            _exact_derivs[sys_num]->
                            component(var_component+c, q_point[qp], time)(d);
            }
            else if (_equation_systems_fine)
            {
                // FIXME: Needs to be updated for vector-valued elements
                std::vector<Gradient> output(1);
                coarse_values->gradient(q_point[qp],time,output,&subdomain_id);
                exact_grad = output[0];
            }

            const typename FEGenericBase<OutputShape>::OutputNumberGradient grad_error = grad_u_h - exact_grad;

            error_vals[1] += JxW[qp]*grad_error.norm_sq();


            if( FEInterface::field_type(fe_type) == TYPE_VECTOR )
            {
                // Compute the value of the error in the curl at this
                // quadrature point
                typename FEGenericBase<OutputShape>::OutputNumber exact_curl(0.0);
                if (_exact_derivs.size() > sys_num && _exact_derivs[sys_num])
                {
                    exact_curl = TensorTools::curl_from_grad( exact_grad );
                }
                else if (_equation_systems_fine)
                {
                    // FIXME: Need to implement curl for MeshFunction and support reference
                    //        solution for vector-valued elements
                }

                const typename FEGenericBase<OutputShape>::OutputNumber curl_error = curl_u_h - exact_curl;

                error_vals[5] += JxW[qp]*TensorTools::norm_sq(curl_error);

                // Compute the value of the error in the divergence at this
                // quadrature point
                typename FEGenericBase<OutputShape>::OutputNumberDivergence exact_div = 0.0;
                if (_exact_derivs.size() > sys_num && _exact_derivs[sys_num])
                {
                    exact_div = TensorTools::div_from_grad( exact_grad );
                }
                else if (_equation_systems_fine)
                {
                    // FIXME: Need to implement div for MeshFunction and support reference
                    //        solution for vector-valued elements
                }

                const typename FEGenericBase<OutputShape>::OutputNumberDivergence div_error = div_u_h - exact_div;

                error_vals[6] += JxW[qp]*TensorTools::norm_sq(div_error);
            }

#ifdef LIBMESH_ENABLE_SECOND_DERIVATIVES
            // Compute the value of the error in the hessian at this
            // quadrature point
            typename FEGenericBase<OutputShape>::OutputNumberTensor exact_hess;
            RawAccessor<typename FEGenericBase<OutputShape>::OutputNumberTensor> exact_hess_accessor( exact_hess, dim );
            if (_exact_hessians.size() > sys_num && _exact_hessians[sys_num])
            {
                //FIXME: This needs to be implemented to support rank 3 tensors
                //       which can't happen until type_n_tensor is fully implemented
                //       and a RawAccessor<TypeNTensor> is fully implemented
                if( FEInterface::field_type(fe_type) == TYPE_VECTOR )
                    libmesh_not_implemented();

                for( unsigned int c = 0; c < n_vec_dim; c++)
                    for( unsigned int d = 0; d < dim; d++ )
                        for( unsigned int e =0; e < dim; e++ )
                            exact_hess_accessor(d + e*dim + c*dim*dim) =
                                _exact_hessians[sys_num]->
                                component(var_component+c, q_point[qp], time)(d,e);
            }
            else if (_equation_systems_fine)
            {
                // FIXME: Needs to be updated for vector-valued elements
                std::vector<Tensor> output(1);
                coarse_values->hessian(q_point[qp],time,output,&subdomain_id);
                exact_hess = output[0];
            }

            const typename FEGenericBase<OutputShape>::OutputNumberTensor grad2_error = grad2_u_h - exact_hess;

            // FIXME: PB: Is this what we want for rank 3 tensors?
            error_vals[2] += JxW[qp]*grad2_error.norm_sq();
#endif

        } // end qp loop
    } // end element loop

    // Clean up the FE and QBase pointers we created
    for( std::set<unsigned char>::const_iterator d_it = elem_dims.begin();
            d_it != elem_dims.end(); ++d_it )
    {
        delete fe_ptrs[*d_it];
        delete q_rules[*d_it];
    }

    // Add up the error values on all processors, except for the L-infty
    // norm, for which the maximum is computed.
    Real l_infty_norm = error_vals[4];
    communicator.max(l_infty_norm);
    communicator.sum(error_vals);
    error_vals[4] = l_infty_norm;
}
Beispiel #13
0
LibMeshInit::LibMeshInit (int argc, const char * const * argv,
                          MPI_Comm COMM_WORLD_IN)
#endif
{
  // should _not_ be initialized already.
  libmesh_assert (!libMesh::initialized());

  // Build a command-line parser.
  command_line.reset (new GetPot (argc, argv));

  // Disable performance logging upon request
  {
    if (libMesh::on_command_line ("--disable-perflog"))
      libMesh::perflog.disable_logging();
  }

  // Build a task scheduler
  {
    // Get the requested number of threads, defaults to 1 to avoid MPI and
    // multithreading competition.  If you would like to use MPI and multithreading
    // at the same time then (n_mpi_processes_per_node)x(n_threads) should be the
    //  number of processing cores per node.
    std::vector<std::string> n_threads(2);
    n_threads[0] = "--n_threads";
    n_threads[1] = "--n-threads";
    libMesh::libMeshPrivateData::_n_threads =
      libMesh::command_line_value (n_threads, 1);

    // If there's no threading model active, force _n_threads==1
#if !LIBMESH_USING_THREADS
    if (libMesh::libMeshPrivateData::_n_threads != 1)
      {
        libMesh::libMeshPrivateData::_n_threads = 1;
        libmesh_warning("Warning: You requested --n-threads>1 but no threading model is active!\n"
                        << "Forcing --n-threads==1 instead!");
      }
#endif

    // Set the number of OpenMP threads to the same as the number of threads libMesh is going to use
#ifdef LIBMESH_HAVE_OPENMP
    omp_set_num_threads(libMesh::libMeshPrivateData::_n_threads);
#endif

    task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
  }

  // Construct singletons who may be at risk of the
  // "static initialization order fiasco"
  Singleton::setup();

  // Make sure the construction worked
  libmesh_assert(remote_elem);

#if defined(LIBMESH_HAVE_MPI)

  // Allow the user to bypass MPI initialization
  if (!libMesh::on_command_line ("--disable-mpi"))
    {
      // Check whether the calling program has already initialized
      // MPI, and avoid duplicate Init/Finalize
      int flag;
      libmesh_call_mpi(MPI_Initialized (&flag));

      if (!flag)
        {
          int mpi_thread_provided;
          const int mpi_thread_requested = libMesh::n_threads() > 1 ?
            MPI_THREAD_FUNNELED :
            MPI_THREAD_SINGLE;

          libmesh_call_mpi
            (MPI_Init_thread (&argc, const_cast<char ***>(&argv),
                              mpi_thread_requested, &mpi_thread_provided));

          if ((libMesh::n_threads() > 1) &&
              (mpi_thread_provided < MPI_THREAD_FUNNELED))
            {
              libmesh_warning("Warning: MPI failed to guarantee MPI_THREAD_FUNNELED\n"
                              << "for a threaded run.\n"
                              << "Be sure your library is funneled-thread-safe..."
                              << std::endl);

              // Ideally, if an MPI stack tells us it's unsafe for us
              // to use threads, we shouldn't use threads.
              // In practice, we've encountered one MPI stack (an
              // mvapich2 configuration) that returned
              // MPI_THREAD_SINGLE as a proper warning, two stacks
              // that handle MPI_THREAD_FUNNELED properly, and two
              // current stacks plus a couple old stacks that return
              // MPI_THREAD_SINGLE but support libMesh threaded runs
              // anyway.

              // libMesh::libMeshPrivateData::_n_threads = 1;
              // task_scheduler.reset (new Threads::task_scheduler_init(libMesh::n_threads()));
            }
          libmesh_initialized_mpi = true;
        }

      // Duplicate the input communicator for internal use
      // And get a Parallel::Communicator copy too, to use
      // as a default for that API
      this->_comm = COMM_WORLD_IN;

      libMesh::GLOBAL_COMM_WORLD = COMM_WORLD_IN;

      //MPI_Comm_set_name not supported in at least SGI MPT's MPI implementation
      //MPI_Comm_set_name (libMesh::COMM_WORLD, "libMesh::COMM_WORLD");

      libMeshPrivateData::_processor_id =
        cast_int<processor_id_type>(this->comm().rank());
      libMeshPrivateData::_n_processors =
        cast_int<processor_id_type>(this->comm().size());

      // Set up an MPI error handler if requested.  This helps us get
      // into a debugger with a proper stack when an MPI error occurs.
      if (libMesh::on_command_line ("--handle-mpi-errors"))
        {
          libmesh_call_mpi
            (MPI_Comm_create_errhandler(libMesh_MPI_Handler, &libmesh_errhandler));
          libmesh_call_mpi
            (MPI_Comm_set_errhandler(libMesh::GLOBAL_COMM_WORLD, libmesh_errhandler));
          libmesh_call_mpi
            (MPI_Comm_set_errhandler(MPI_COMM_WORLD, libmesh_errhandler));
        }
    }

  // Could we have gotten bad values from the above calls?
  libmesh_assert_greater (libMeshPrivateData::_n_processors, 0);

  // The cast_int already tested _processor_id>=0
  // libmesh_assert_greater_equal (libMeshPrivateData::_processor_id, 0);

  // Let's be sure we properly initialize on every processor at once:
  libmesh_parallel_only(this->comm());

#endif

#if defined(LIBMESH_HAVE_PETSC)

  // Allow the user to bypass PETSc initialization
  if (!libMesh::on_command_line ("--disable-petsc")

#if defined(LIBMESH_HAVE_MPI)
      // If the user bypassed MPI, we'd better be safe and assume that
      // PETSc was built to require it; otherwise PETSc initialization
      // dies.
      && !libMesh::on_command_line ("--disable-mpi")
#endif
      )
    {
      int ierr=0;

      PETSC_COMM_WORLD = libMesh::GLOBAL_COMM_WORLD;

      // Check whether the calling program has already initialized
      // PETSc, and avoid duplicate Initialize/Finalize
      PetscBool petsc_already_initialized;
      ierr = PetscInitialized(&petsc_already_initialized);
      CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
      if (petsc_already_initialized != PETSC_TRUE)
        libmesh_initialized_petsc = true;
# if defined(LIBMESH_HAVE_SLEPC)

      // If SLEPc allows us to check whether the calling program
      // has already initialized it, we do that, and avoid
      // duplicate Initialize/Finalize.
      // We assume that SLEPc will handle PETSc appropriately,
      // which it does in the versions we've checked.
      if (!SlepcInitializeCalled)
        {
          ierr = SlepcInitialize  (&argc, const_cast<char ***>(&argv), nullptr, nullptr);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
          libmesh_initialized_slepc = true;
        }
# else
      if (libmesh_initialized_petsc)
        {
          ierr = PetscInitialize (&argc, const_cast<char ***>(&argv), nullptr, nullptr);
          CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
        }
# endif
#if !PETSC_RELEASE_LESS_THAN(3,3,0)
      // Register the reference implementation of DMlibMesh
#if PETSC_RELEASE_LESS_THAN(3,4,0)
      ierr = DMRegister(DMLIBMESH, PETSC_NULL, "DMCreate_libMesh", DMCreate_libMesh); CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
#else
      ierr = DMRegister(DMLIBMESH, DMCreate_libMesh); CHKERRABORT(libMesh::GLOBAL_COMM_WORLD,ierr);
#endif

#endif
    }
#endif

#if defined(LIBMESH_HAVE_MPI) && defined(LIBMESH_HAVE_VTK)
  // Do MPI initialization for VTK.
  _vtk_mpi_controller = vtkMPIController::New();
  _vtk_mpi_controller->Initialize(&argc, const_cast<char ***>(&argv), /*initialized_externally=*/1);
  _vtk_mpi_controller->SetGlobalController(_vtk_mpi_controller);
#endif

  // Re-parse the command-line arguments.  Note that PETSc and MPI
  // initialization above may have removed command line arguments
  // that are not relevant to this application in the above calls.
  // We don't want a false-positive by detecting those arguments.
  //
  // Note: this seems overly paranoid/like it should be unnecessary,
  // plus we were doing it wrong for many years and not clearing the
  // existing GetPot object before re-parsing the command line, so all
  // the command line arguments appeared twice in the GetPot object...
  command_line.reset (new GetPot (argc, argv));

  // The following line is an optimization when simultaneous
  // C and C++ style access to output streams is not required.
  // The amount of benefit which occurs is probably implementation
  // defined, and may be nothing.  On the other hand, I have seen
  // some IO tests where IO performance improves by a factor of two.
  if (!libMesh::on_command_line ("--sync-with-stdio"))
    std::ios::sync_with_stdio(false);

  // Honor the --separate-libmeshout command-line option.
  // When this is specified, the library uses an independent ostream
  // for libMesh::out/libMesh::err messages, and
  // std::cout and std::cerr are untouched by any other options
  if (libMesh::on_command_line ("--separate-libmeshout"))
    {
      // Redirect.  We'll share streambufs with cout/cerr for now, but
      // presumably anyone using this option will want to replace the
      // bufs later.
      std::ostream * newout = new std::ostream(std::cout.rdbuf());
      libMesh::out = *newout;
      std::ostream * newerr = new std::ostream(std::cerr.rdbuf());
      libMesh::err = *newerr;
    }

  // Process command line arguments for redirecting stdout/stderr.
  bool
    cmdline_has_redirect_stdout = libMesh::on_command_line ("--redirect-stdout"),
    cmdline_has_redirect_output = libMesh::on_command_line ("--redirect-output");

  // The --redirect-stdout command-line option has been deprecated in
  // favor of "--redirect-output basename".
  if (cmdline_has_redirect_stdout)
    libmesh_warning("The --redirect-stdout command line option has been deprecated. "
                    "Use '--redirect-output basename' instead.");

  // Honor the "--redirect-stdout" and "--redirect-output basename"
  // command-line options.  When one of these is specified, each
  // processor sends libMesh::out/libMesh::err messages to
  // stdout.processor.#### (default) or basename.processor.####.
  if (cmdline_has_redirect_stdout || cmdline_has_redirect_output)
    {
      std::string basename = "stdout";

      // Look for following argument if using new API
      if (cmdline_has_redirect_output)
        {
          // Set the cursor to the correct location in the list of command line arguments.
          command_line->search(1, "--redirect-output");

          // Get the next option on the command line as a string.
          std::string next_string = "";
          next_string = command_line->next(next_string);

          // If the next string starts with a dash, we assume it's
          // another flag and not a file basename requested by the
          // user.
          if (next_string.size() > 0 && next_string.find_first_of("-") != 0)
            basename = next_string;
        }

      std::ostringstream filename;
      filename << basename << ".processor." << libMesh::global_processor_id();
      _ofstream.reset (new std::ofstream (filename.str().c_str()));

      // Redirect, saving the original streambufs!
      out_buf = libMesh::out.rdbuf (_ofstream->rdbuf());
      err_buf = libMesh::err.rdbuf (_ofstream->rdbuf());
    }

  // redirect libMesh::out to nothing on all
  // other processors unless explicitly told
  // not to via the --keep-cout command-line argument.
  if (libMesh::global_processor_id() != 0)
    if (!libMesh::on_command_line ("--keep-cout"))
      libMesh::out.rdbuf (nullptr);

  // Similarly, the user can request to drop cerr on all non-0 ranks.
  // By default, errors are printed on all ranks, but this can lead to
  // interleaved/unpredictable outputs when doing parallel regression
  // testing, which this option is designed to support.
  if (libMesh::global_processor_id() != 0)
    if (libMesh::on_command_line ("--drop-cerr"))
      libMesh::err.rdbuf (nullptr);

  // Check command line to override printing
  // of reference count information.
  if (libMesh::on_command_line("--disable-refcount-printing"))
    ReferenceCounter::disable_print_counter_info();

#ifdef LIBMESH_ENABLE_EXCEPTIONS
  // Set our terminate handler to write stack traces in the event of a
  // crash
  old_terminate_handler = std::set_terminate(libmesh_terminate_handler);
#endif


  if (libMesh::on_command_line("--enable-fpe"))
    libMesh::enableFPE(true);

  if (libMesh::on_command_line("--enable-segv"))
    libMesh::enableSEGV(true);

  // The library is now ready for use
  libMeshPrivateData::_is_initialized = true;


  // Make sure these work.  Library methods
  // depend on these being implemented properly,
  // so this is a good time to test them!
  libmesh_assert (libMesh::initialized());
  libmesh_assert (!libMesh::closed());
}