Esempio n. 1
0
BulkData::BulkData( MetaData & mesh_meta_data ,
                    ParallelMachine parallel ,
                    unsigned bucket_max_size ,
                    bool use_memory_pool )
  : m_entities_index( parallel, convert_entity_keys_to_spans(mesh_meta_data) ),
    m_entity_repo(use_memory_pool),
    m_bucket_repository(
        *this, bucket_max_size,
        mesh_meta_data.entity_rank_count(),
        m_entity_repo
        ),
    m_entity_comm(),
    m_ghosting(),

    m_mesh_meta_data( mesh_meta_data ),
    m_parallel_machine( parallel ),
    m_parallel_size( parallel_machine_size( parallel ) ),
    m_parallel_rank( parallel_machine_rank( parallel ) ),
    m_sync_count( 0 ),
    m_sync_state( MODIFIABLE ),
    m_meta_data_verified( false ),
    m_optimize_buckets(false),
    m_mesh_finalized(false)
{
  create_ghosting( "shared" );
  create_ghosting( "shared_aura" );

  m_sync_state = SYNCHRONIZED ;
}
Esempio n. 2
0
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm )
{
  const unsigned p_rank = parallel_machine_rank( pm );

  const bool is_root = 0 == p_rank ;

  CommBroadcast comm( pm , 0 );

  if ( is_root ) {
    pack( comm.send_buffer() , s.get_parts() );
    pack( comm.send_buffer() , s.get_fields() );
  }

  comm.allocate_buffer();

  if ( is_root ) {
    pack( comm.send_buffer() , s.get_parts() );
    pack( comm.send_buffer() , s.get_fields() );
  }

  comm.communicate();

  int ok[ 2 ];

  ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() );
  ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() );

  all_reduce( pm , ReduceMin<2>( ok ) );

  ThrowRequireMsg(ok[0], "P" << p_rank << ": FAILED for Parts");
  ThrowRequireMsg(ok[1], "P" << p_rank << ": FAILED for Fields");
}
Esempio n. 3
0
void CopySearchCommAll::do_search(const CopyTransferMeshBase & mesha,
                                  const CopyTransferMeshBase & meshb,
                                  KeyToTargetProcessor & key_to_target_processor
                                  )
{
  key_to_target_processor.clear();
  m_remote_keys.clear();
  const CopyTransferMeshBase::MeshIDVector & meshb_ids = meshb.get_mesh_ids();
  const ParallelMachine comm = meshb.comm();
  const int my_proc = parallel_machine_rank(comm);
  const int num_proc = parallel_machine_size(comm);
  stk::CommSparse commSparse(comm);
  for (int phase=0;phase<2;++phase)
  {
    for (size_t id_index=0 ; id_index<meshb_ids.size() ; ++id_index)
    {
      const Mesh_ID key = meshb_ids[id_index];
      if (mesha.is_locally_owned(key))
      {
        key_to_target_processor[key]=my_proc;
        continue;
      }
      m_remote_keys.insert(key);
      for (int send_proc = 0 ; send_proc < num_proc ; ++send_proc)
      {
        if (my_proc == send_proc) { continue; }
        commSparse.send_buffer(send_proc).pack<Mesh_ID>(key);
      }
    }

    if (phase == 0 )
    {
      commSparse.allocate_buffers();
    }
    else
    {
      commSparse.communicate();
    }
  }

  for (int recv_proc=0;recv_proc<num_proc;++recv_proc)
  {
    if ( my_proc != recv_proc )
    {
      while(commSparse.recv_buffer(recv_proc).remaining())
      {
        Mesh_ID key;
        commSparse.recv_buffer(recv_proc).unpack<Mesh_ID>(key);
        if (mesha.is_locally_owned(key))
        {
          key_to_target_processor[key] = recv_proc;
        }
      }
    }
  }
}
Esempio n. 4
0
CommSparse::CommSparse( ParallelMachine comm)
  : m_comm( comm ),
    m_size( parallel_machine_size( comm ) ),
    m_rank( parallel_machine_rank( comm ) ),
    m_send(),
    m_recv(),
    m_send_data(),
    m_recv_data(),
    m_send_procs(),
    m_recv_procs()
{
  m_send.resize(m_size);
  m_recv.resize(m_size);
}
Esempio n. 5
0
  void bounding_boxes (std::vector< std::pair<Sphere,EntityProc> > &v) const
  {
    const unsigned dimension = m_bulk_data.mesh_meta_data().spatial_dimension();
    const float r = m_sphere_rad;
    const int proc_id = parallel_machine_rank(m_comm);

    v.clear();

    for (EntityKeySet::const_iterator k=m_entity_keys.begin(); k!=m_entity_keys.end(); ++k) {
      const EntityKey id = *k;
      Point center;
      const double *c = coord(id);
      for (unsigned j=0; j<dimension; ++j) {
        center[j] = c[j];
      }
      v.push_back( std::make_pair( Sphere(center,r), EntityProc(id, proc_id)));
    }
  }
Esempio n. 6
0
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm )
{
  static const char method[] = "phdmesh::verify_parallel_consistency(MetaData)" ;

  const unsigned p_rank = parallel_machine_rank( pm );

  const bool is_root = 0 == p_rank ;

  CommBroadcast comm( pm , 0 );

  if ( is_root ) {
    pack( comm.send_buffer() , s.get_parts() );
    pack( comm.send_buffer() , s.get_fields() );
  }

  comm.allocate_buffer();

  if ( is_root ) {
    pack( comm.send_buffer() , s.get_parts() );
    pack( comm.send_buffer() , s.get_fields() );
  }

  comm.communicate();

  int ok[ 2 ];

  ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() );
  ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() );

  all_reduce( pm , Min<2>( ok ) );

  if ( ! ok[0] || ! ok[1] ) {
    std::ostringstream msg ;
    msg << "P" << p_rank ;
    msg << ": " << method ;
    msg << " : FAILED for:" ;
    if ( ! ok[0] ) { msg << " Parts" ; }
    if ( ! ok[1] ) { msg << " Fields" ; }
    throw std::logic_error( msg.str() );
  }
}
Esempio n. 7
0
void communicate_field_data(
  ParallelMachine machine,
  const std::vector<EntityProc> & domain ,
  const std::vector<EntityProc> & range ,
  const std::vector<const FieldBase *> & fields)
{
  if ( fields.empty() ) { return; }

  const unsigned parallel_size = parallel_machine_size( machine );
  const unsigned parallel_rank = parallel_machine_rank( machine );
  const bool     asymmetric    = & domain != & range ;

  const std::vector<const FieldBase *>::const_iterator fe = fields.end();
  const std::vector<const FieldBase *>::const_iterator fb = fields.begin();
        std::vector<const FieldBase *>::const_iterator fi ;

  // Sizing for send and receive

  const unsigned zero = 0 ;
  std::vector<unsigned> send_size( parallel_size , zero );
  std::vector<unsigned> recv_size( parallel_size , zero );

  std::vector<EntityProc>::const_iterator i ;

  for ( i = domain.begin() ; i != domain.end() ; ++i ) {
    Entity       & e = * i->first ;
    const unsigned p = i->second ;

    if ( asymmetric || parallel_rank == e.owner_rank() ) {
      unsigned e_size = 0 ;
      for ( fi = fb ; fi != fe ; ++fi ) {
        const FieldBase & f = **fi ;
        e_size += field_data_size( f , e );
      }
      send_size[ p ] += e_size ;
    }
  }

  for ( i = range.begin() ; i != range.end() ; ++i ) {
    Entity       & e = * i->first ;
    const unsigned p = i->second ;

    if ( asymmetric || p == e.owner_rank() ) {
      unsigned e_size = 0 ;
      for ( fi = fb ; fi != fe ; ++fi ) {
        const FieldBase & f = **fi ;
        e_size += field_data_size( f , e );
      }
      recv_size[ p ] += e_size ;
    }
  }

  // Allocate send and receive buffers:

  CommAll sparse ;

  {
    const unsigned * const s_size = & send_size[0] ;
    const unsigned * const r_size = & recv_size[0] ;
    sparse.allocate_buffers( machine, parallel_size / 4 , s_size, r_size);
  }

  // Pack for send:

  for ( i = domain.begin() ; i != domain.end() ; ++i ) {
    Entity       & e = * i->first ;
    const unsigned p = i->second ;

    if ( asymmetric || parallel_rank == e.owner_rank() ) {
      CommBuffer & b = sparse.send_buffer( p );
      for ( fi = fb ; fi != fe ; ++fi ) {
        const FieldBase & f = **fi ;
        const unsigned size = field_data_size( f , e );
        if ( size ) {
          unsigned char * ptr = reinterpret_cast<unsigned char *>(field_data( f , e ));
          b.pack<unsigned char>( ptr , size );
        }
      }
    }
  }

  // Communicate:

  sparse.communicate();

  // Unpack for recv:

  for ( i = range.begin() ; i != range.end() ; ++i ) {
    Entity       & e = * i->first ;
    const unsigned p = i->second ;

    if ( asymmetric || p == e.owner_rank() ) {
      CommBuffer & b = sparse.recv_buffer( p );
      for ( fi = fb ; fi != fe ; ++fi ) {
        const FieldBase & f = **fi ;
        const unsigned size = field_data_size( f , e );
        if ( size ) {
          unsigned char * ptr = reinterpret_cast<unsigned char *>(field_data( f , e ));
          b.unpack<unsigned char>( ptr , size );
        }
      }
    }
  }
}
Esempio n. 8
0
CR_Matrix::CR_Matrix(
 ParallelMachine arg_comm ,
 const std::vector<unsigned> & arg_partition ,
       std::vector<unsigned> & arg_prefix ,
       std::vector<unsigned> & arg_coli ,
       std::vector<double>   & arg_coef )
  : m_comm( arg_comm ),
    m_comm_size( parallel_machine_size( arg_comm ) ),
    m_comm_rank( parallel_machine_rank( arg_comm ) ),
    m_sparse( false ),
    m_work_disp(),
    m_send_disp(),
    m_send_map(),
    m_row_size( 0 ),
    m_prefix(),
    m_coli(),
    m_coef()
{
  static const char method[] = "phdmesh::CR_Matrix::CR_Matrix" ;

  if ( arg_prefix.empty() ) { return ; }

  //------------------------------------

  if ( arg_coli.size() != arg_prefix.back() ||
       arg_coef.size() != arg_prefix.back() ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " arg_coli.size() = " << arg_coli.size() ;
    msg << " arg_coef.size() = " << arg_coef.size() ;
    msg << " !=  arg_prefix.back() = " << arg_prefix.back() ;
    throw std::invalid_argument( msg.str() );
  }

  swap( m_prefix , arg_prefix );
  swap( m_coli , arg_coli );
  swap( m_coef , arg_coef );

  m_row_size = m_prefix.size() - 1 ;

  if ( 1 == m_comm_size ) { return ; }

  //------------------------------------

  if ( arg_partition.size() != 1 + m_comm_size ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " comm_size = " << m_comm_size ;
    msg << " + 1  !=  arg_partition.size() = " << arg_partition.size() ;
    throw std::invalid_argument( msg.str() );
  }

  const unsigned row_first = arg_partition[ m_comm_rank ];
  const unsigned row_end   = arg_partition[ m_comm_rank + 1 ] ;

  if ( m_row_size != ( row_end - row_first ) ) {
    std::ostringstream msg ;
    msg << method << " ERROR" ;
    msg << " arg_prefix'row_size = " << m_row_size ;
    msg << " !=  arg_partition'row_size = " << ( row_end - row_first );
    throw std::invalid_argument( msg.str() );
  }

  //------------------------------------

  m_send_disp.resize( m_comm_size + 1 );
  m_work_disp.resize( m_comm_size + 1 );

  // Generate a vector of off-processor column identifiers

  std::vector<unsigned> work_col_ident ;

  {
    const std::vector<unsigned>::iterator j = m_coli.end();
          std::vector<unsigned>::iterator b = m_coli.begin();
          std::vector<unsigned>::iterator i ;

    for ( i = b ; j != i ; ++i ) { 
      const unsigned global_col = *i ;
      if ( global_col < row_first || row_end <= global_col ) {
        ordered_insert( work_col_ident , global_col );
      }
    }
  }

  //------------------------------------
  // Map column global identifiers to local work offsets

  {
    const std::vector<unsigned>::iterator b = work_col_ident.begin();
    const std::vector<unsigned>::iterator e = work_col_ident.end();
          std::vector<unsigned>::iterator j ;

    j = std::lower_bound( b , e , row_end );

    const unsigned local_row_end = j - b ;

    for ( std::vector<unsigned>::iterator
          i = m_coli.begin() ; i != m_coli.end() ; ++i ) {
      const unsigned global_col = *i ;

      j = std::lower_bound( b, e, global_col );

      unsigned local_col = j - b ;

      if ( row_end <= global_col ) { local_col += local_row_end ; }

      *i = local_col ;
    }
  }

  //------------------------------------
  // Displacement prefix for work vector

  {
    std::vector<unsigned>::const_iterator i = work_col_ident.begin() ;

    m_work_disp[0] = 0 ;

    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      const unsigned p_row_end = arg_partition[p+1] ;
      unsigned count = 0 ;
      for ( ; i != work_col_ident.end() && *i < p_row_end ; ++i ) {
        ++count ;
      }

      m_work_disp[p+1] = m_work_disp[p] + count ;
    }
  }

  //------------------------------------
  // Set up communications to gather work subvector

  {
    std::vector<unsigned> send_col_size( m_comm_size );
    std::vector<unsigned> recv_col_size( m_comm_size );

    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      send_col_size[p] = m_work_disp[p+1] - m_work_disp[p] ;
    }

    if ( send_col_size[ m_comm_rank ] ) {
      std::ostringstream msg ;
      msg << method << " ERROR with communication sizing logic" ;
      throw std::logic_error( msg.str() );
    }

    unsigned num_msg_maximum = 0 ;

    comm_sizes( m_comm , m_comm_size / 4 , num_msg_maximum ,
                & send_col_size[0] , & recv_col_size[0] );

    m_sparse = num_msg_maximum < ( m_comm_size / 4 );

    m_send_disp[0] = 0 ;
    for ( unsigned p = 0 ; p < m_comm_size ; ++p ) {
      m_send_disp[p+1] = m_send_disp[p] + recv_col_size[p] ;
    }
  }

  const unsigned send_map_size = m_send_disp[ m_comm_size ];

  m_send_map.resize( send_map_size );

  all_to_all( m_comm , PARALLEL_DATATYPE_UNSIGNED , m_sparse ,
              & work_col_ident[0] , & m_work_disp[0],
              & m_send_map[0] ,     & m_send_disp[0] );

  //------------------------------------
  // Remap the 'm_work_disp' for receiving coefficients into the
  // work vector: [ lower_row_recv , local_row , upper_row_recv ]

  for ( unsigned p = m_comm_rank ; p < m_comm_size ; ++p ) {
    m_work_disp[p+1] += m_row_size ;
  }

  //------------------------------------
  // Map the send_map from global to local indices,
  // also sanity check it.

  for ( unsigned i = 0 ; i < send_map_size ; ++i ) {

    if ( m_send_map[i] < (int) row_first ||
                         (int) row_end <= m_send_map[i] ) {
      std::ostringstream msg ;
      msg << method << " ERROR Received index " ;
      msg << m_send_map[i] ;
      msg << " out of range [ " ;
      msg << row_first ;
      msg << " : " ;
      msg << row_end ;
      msg << " )" ;
      throw std::runtime_error( msg.str() );
    }

    m_send_map[i] -= row_first ;
  }
}
Esempio n. 9
0
void comm_recv_procs_and_msg_sizes(ParallelMachine comm ,
                                   const std::vector<CommBuffer>& send_bufs ,
                                         std::vector<CommBuffer>& recv_bufs,
                                   std::vector<int>& send_procs,
                                   std::vector<int>& recv_procs)
{
  static const char method[] = "stk::comm_procs_and_msg_recv_sizes" ;

  const int p_size = parallel_machine_size( comm );

  int result = MPI_SUCCESS ;

  MPI_Datatype uint_type = MPI_LONG_LONG;
  if (sizeof(int) == sizeof(unsigned))
    uint_type = MPI_INT;
  else if (sizeof(long) == sizeof(unsigned))
    uint_type = MPI_LONG;
  else if (sizeof(long long) == sizeof(unsigned))
    uint_type = MPI_LONG_LONG;
  else {
    std::ostringstream msg ;
    msg << method << " ERROR: No matching MPI type found for size_t argument";
    throw std::runtime_error(msg.str());
  }

  std::vector<unsigned> buf;
  buf.reserve(p_size*2);
  int* recvcounts = reinterpret_cast<int*>(&buf[0]);
  unsigned * tmp = &buf[p_size];
  send_procs.clear();
  send_procs.reserve(16);
  for ( int i = 0 ; i < p_size ; ++i ) {
    recvcounts[i] = 1;
    tmp[i] = 0;
    if ( send_bufs[i].size() > 0 ) {
      tmp[i] = 1 ;
      send_procs.push_back(i);
    }
  }

  unsigned num_recv = 0;

  result = MPI_Reduce_scatter(tmp,&num_recv,recvcounts,uint_type,MPI_SUM,comm);

  if ( result != MPI_SUCCESS ) {
    // PARALLEL ERROR
    std::ostringstream msg ;
    msg << method << " ERROR: " << result << " == MPI_Reduce_scatter" ;
    throw std::runtime_error( msg.str() );
  }

  // do point-to-point send/recvs

  const int mpi_tag = STK_COMMSPARSE_MPI_TAG_PROC_SIZING;

  MPI_Request request_null = MPI_REQUEST_NULL ;
  MPI_Status init_status;
  std::vector<MPI_Request> request( num_recv , request_null );
  std::vector<MPI_Status>  status(  num_recv , init_status );

  // Post receives for point-to-point message sizes

  for ( unsigned i = 0 ; i < num_recv ; ++i ) {
    unsigned    * const p_buf     = & buf[i] ;
    MPI_Request * const p_request = & request[i] ;
    result = MPI_Irecv( p_buf , 1 , uint_type,
                        MPI_ANY_SOURCE , mpi_tag , comm , p_request );
    if ( MPI_SUCCESS != result ) {
      // LOCAL ERROR
      std::ostringstream msg ;
      msg << method << " ERROR: " << result << " == MPI_Irecv" ;
      throw std::runtime_error( msg.str() );
    }
  }

  // Send the point-to-point message sizes,

  for ( size_t i = 0 ; i < send_procs.size() ; ++i ) {
    int      dst = send_procs[i];
    unsigned value = send_bufs[dst].size();
    result = MPI_Send( & value , 1 , uint_type, dst , mpi_tag , comm );
    if ( MPI_SUCCESS != result ) {
      // LOCAL ERROR
      std::ostringstream msg ;
      msg << method << " ERROR: " << result << " == MPI_Send" ;
      throw std::runtime_error( msg.str() );
    }
  }

  // Wait for all receives

  {
    MPI_Request * const p_request = (request.empty() ? NULL : & request[0]) ;
    MPI_Status  * const p_status  = (status.empty() ? NULL : & status[0]) ;
    result = MPI_Waitall( num_recv , p_request , p_status );
  }
  if ( MPI_SUCCESS != result ) {
    // LOCAL ERROR ?
    std::ostringstream msg ;
    msg << method << " ERROR: " << result << " == MPI_Waitall" ;
    throw std::runtime_error( msg.str() );
  }

  recv_procs.resize(num_recv);

  // Set the receive message sizes

  for ( unsigned i = 0 ; i < num_recv ; ++i ) {
    MPI_Status * const recv_status = & status[i] ;
    const int recv_proc = recv_status->MPI_SOURCE ;

#ifndef NDEBUG
    //debug-mode-only error check
    const int recv_tag  = recv_status->MPI_TAG ;
    int recv_count  = 0 ;

    MPI_Get_count( recv_status , uint_type , & recv_count );

    if ( recv_tag != mpi_tag || recv_count != 1 ) {
      std::ostringstream msg ;
      const int p_rank = parallel_machine_rank( comm );
      msg << method << " ERROR: Received buffer mismatch " ;
      msg << "P" << p_rank << " <- P" << recv_proc ;
      msg << "  " << 1 << " != " << recv_count ;
      throw std::runtime_error( msg.str() );
    }
#endif

    recv_bufs[ recv_proc ].set_size(buf[i]);
    recv_procs[i] = recv_proc;
  }
}
void UnitTestBulkData::testChangeParts( ParallelMachine pm )
{
  static const char method[] =
    "stk::mesh::UnitTestBulkData::testChangeParts" ;

  std::cout << std::endl << method << std::endl ;

  const unsigned p_size = parallel_machine_size( pm );
  const unsigned p_rank = parallel_machine_rank( pm );

  if ( 1 < p_size ) return ;

  // Single process, no sharing

  // Meta data with entity ranks [0..9]
  std::vector<std::string> entity_names(10);
  for ( size_t i = 0 ; i < 10 ; ++i ) {
    std::ostringstream name ;
    name << "EntityRank_" << i ;
    entity_names[i] = name.str();
  }

  MetaData meta( entity_names );
  BulkData bulk( meta , pm , 100 );

  Part & part_univ = meta.universal_part();
  Part & part_owns = meta.locally_owned_part();

  Part & part_A_0 = meta.declare_part( std::string("A_0") , 0 );
  Part & part_A_1 = meta.declare_part( std::string("A_1") , 1 );
  Part & part_A_2 = meta.declare_part( std::string("A_2") , 2 );
  Part & part_A_3 = meta.declare_part( std::string("A_3") , 3 );

  Part & part_B_0 = meta.declare_part( std::string("B_0") , 0 );
  // Part & part_B_1 = meta.declare_part( std::string("B_1") , 1 );
  Part & part_B_2 = meta.declare_part( std::string("B_2") , 2 );
  // Part & part_B_3 = meta.declare_part( std::string("B_3") , 3 );

  meta.commit();
  bulk.modification_begin();

  PartVector tmp(1);

  tmp[0] = & part_A_0 ;
  Entity & entity_0_1 = bulk.declare_entity(  0 , 1 , tmp );

  tmp[0] = & part_A_1 ;
  Entity & entity_1_1 = bulk.declare_entity(  1 , 1 , tmp );

  tmp[0] = & part_A_2 ;
  Entity & entity_2_1 = bulk.declare_entity(  2 , 1 , tmp );

  tmp[0] = & part_A_3 ;
  Entity & entity_3_1 = bulk.declare_entity( 3 , 1 , tmp );

  entity_0_1.bucket().supersets( tmp );
  STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
  STKUNIT_ASSERT( tmp[0] == & part_univ );
  STKUNIT_ASSERT( tmp[1] == & part_owns );
  STKUNIT_ASSERT( tmp[2] == & part_A_0 );

  entity_1_1.bucket().supersets( tmp );
  STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
  STKUNIT_ASSERT( tmp[0] == & part_univ );
  STKUNIT_ASSERT( tmp[1] == & part_owns );
  STKUNIT_ASSERT( tmp[2] == & part_A_1 );

  entity_2_1.bucket().supersets( tmp );
  STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
  STKUNIT_ASSERT( tmp[0] == & part_univ );
  STKUNIT_ASSERT( tmp[1] == & part_owns );
  STKUNIT_ASSERT( tmp[2] == & part_A_2 );

  entity_3_1.bucket().supersets( tmp );
  STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
  STKUNIT_ASSERT( tmp[0] == & part_univ );
  STKUNIT_ASSERT( tmp[1] == & part_owns );
  STKUNIT_ASSERT( tmp[2] == & part_A_3 );

  {
    tmp.resize(1);
    tmp[0] = & part_A_0 ;
    bulk.change_entity_parts( entity_0_1 , tmp );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
  }

  { // Add a new part:
    tmp.resize(1);
    tmp[0] = & part_B_0 ;
    bulk.change_entity_parts( entity_0_1 , tmp );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
    STKUNIT_ASSERT( tmp[3] == & part_B_0 );
  }

  { // Remove the part just added:
    tmp.resize(1);
    tmp[0] = & part_B_0 ;
    bulk.change_entity_parts( entity_0_1 , PartVector() , tmp );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
  }

  { // Relationship induced membership:
    bulk.declare_relation( entity_1_1 , entity_0_1 , 0 );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
    STKUNIT_ASSERT( tmp[3] == & part_A_1 );
  }

  { // Remove relationship induced membership:
    bulk.destroy_relation( entity_1_1 , entity_0_1 );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
  }

  { // Add a new part:
    tmp.resize(1);
    tmp[0] = & part_B_2 ;
    bulk.change_entity_parts( entity_2_1 , tmp );
    entity_2_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_2 );
    STKUNIT_ASSERT( tmp[3] == & part_B_2 );
  }

  { // Relationship induced membership:
    bulk.declare_relation( entity_2_1 , entity_0_1 , 0 );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(5) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
    STKUNIT_ASSERT( tmp[3] == & part_A_2 );
    STKUNIT_ASSERT( tmp[4] == & part_B_2 );
  }

  { // Remove relationship induced membership:
    bulk.destroy_relation( entity_2_1 , entity_0_1 );
    entity_0_1.bucket().supersets( tmp );
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
  }

  bulk.modification_end();

  //------------------------------
  // Now the parallel fun.  Existing entities should be shared
  // by all processes since they have the same identifiers.
  // They should also have the same parts.

  entity_0_1.bucket().supersets( tmp );
  if ( entity_0_1.owner_rank() == p_rank ) {
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
  }
  else {
    STKUNIT_ASSERT_EQUAL( size_t(2) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_A_0 );
  }

  entity_2_1.bucket().supersets( tmp );
  if ( entity_2_1.owner_rank() == p_rank ) {
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_2 );
    STKUNIT_ASSERT( tmp[3] == & part_B_2 );
  }
  else {
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_A_2 );
    STKUNIT_ASSERT( tmp[2] == & part_B_2 );
  }

  if (bulk.parallel_size() > 1) {
    STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_0_1.sharing().size() );
    STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_1_1.sharing().size() );
    STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_2_1.sharing().size() );
    STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_3_1.sharing().size() );
  }

  bulk.modification_begin();

  // Add a new part on the owning process:

  int ok_to_modify = entity_0_1.owner_rank() == p_rank ;

  try {
    tmp.resize(1);
    tmp[0] = & part_B_0 ;
    bulk.change_entity_parts( entity_0_1 , tmp );
    STKUNIT_ASSERT( ok_to_modify );
  }
  catch( const std::exception & x ) {
    STKUNIT_ASSERT( ! ok_to_modify );
  }

  entity_0_1.bucket().supersets( tmp );
  if ( entity_0_1.owner_rank() == p_rank ) {
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
    STKUNIT_ASSERT( tmp[3] == & part_B_0 );
  }
  else {
    STKUNIT_ASSERT_EQUAL( size_t(2) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_A_0 );
  }

  bulk.modification_end();

  entity_0_1.bucket().supersets( tmp );
  if ( entity_0_1.owner_rank() == p_rank ) {
    STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_owns );
    STKUNIT_ASSERT( tmp[2] == & part_A_0 );
    STKUNIT_ASSERT( tmp[3] == & part_B_0 );
  }
  else {
    STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() );
    STKUNIT_ASSERT( tmp[0] == & part_univ );
    STKUNIT_ASSERT( tmp[1] == & part_A_0 );
    STKUNIT_ASSERT( tmp[2] == & part_B_0 );
  }
}
void UnitTestBulkData::testChangeParts_loop( ParallelMachine pm )
{
  enum { nPerProc = 10 };
  const unsigned p_rank = parallel_machine_rank( pm );
  const unsigned p_size = parallel_machine_size( pm );
  const unsigned nLocalNode = nPerProc + ( 1 < p_size ? 1 : 0 );
  const unsigned nLocalEdge = nPerProc ;

  UnitTestRingMeshFixture ring_mesh( pm , nPerProc , true /* generate parts */ );
  ring_mesh.m_meta_data.commit();
  ring_mesh.generate_mesh( false /* no aura */ );

  Part & part_owns = ring_mesh.m_meta_data.locally_owned_part();
  Part & part_univ = ring_mesh.m_meta_data.universal_part();

  Selector select_owned( ring_mesh.m_meta_data.locally_owned_part() );
  Selector select_used = select_owned | ring_mesh.m_meta_data.globally_shared_part();
  Selector select_all(   ring_mesh.m_meta_data.universal_part() );

  std::vector<unsigned> local_count ;

  for ( unsigned i = 0 ; i < nLocalEdge ; ++i ) {
    const unsigned n = i + nPerProc * p_rank ;
    Entity * const edge = ring_mesh.m_bulk_data.get_entity( 1 , ring_mesh.m_edge_ids[n] );
    STKUNIT_ASSERT( edge != NULL );
    STKUNIT_ASSERT( edge->bucket().member( part_univ ) );
    STKUNIT_ASSERT( edge->bucket().member( part_owns ) );
    STKUNIT_ASSERT( edge->bucket().member( * ring_mesh.m_edge_parts[ n % ring_mesh.m_edge_parts.size() ] ) );
  }

  for ( unsigned i = 0 ; i < nLocalNode ; ++i ) {
    const unsigned n = ( i + nPerProc * p_rank ) % ring_mesh.m_node_ids.size();
    const unsigned e0 = n ;
    const unsigned e1 = ( n + ring_mesh.m_edge_ids.size() - 1 ) % ring_mesh.m_edge_ids.size();
    const unsigned ns = ring_mesh.m_edge_parts.size();
    const unsigned n0 = e0 % ns ;
    const unsigned n1 = e1 % ns ;
    Part * const epart_0 = ring_mesh.m_edge_parts[ n0 < n1 ? n0 : n1 ];
    Part * const epart_1 = ring_mesh.m_edge_parts[ n0 < n1 ? n1 : n0 ];

    Entity * const node = ring_mesh.m_bulk_data.get_entity( 0 , ring_mesh.m_node_ids[n] );
    STKUNIT_ASSERT( node != NULL );
    if ( node->owner_rank() == p_rank ) {
      STKUNIT_ASSERT( node->bucket().member( part_univ ) );
      STKUNIT_ASSERT( node->bucket().member( part_owns ) );
      STKUNIT_ASSERT( node->bucket().member( *epart_0 ) );
      STKUNIT_ASSERT( node->bucket().member( *epart_1 ) );
    }
    else {
      STKUNIT_ASSERT( node->bucket().member( part_univ ) );
      STKUNIT_ASSERT( ! node->bucket().member( part_owns ) );
      STKUNIT_ASSERT( node->bucket().member( * epart_0 ) );
      STKUNIT_ASSERT( node->bucket().member( * epart_1 ) );
    }
  }

  ring_mesh.m_bulk_data.modification_begin();

  if ( 0 == p_rank ) {

    for ( unsigned i = 0 ; i < nLocalEdge ; ++i ) {
      const unsigned n = i + nPerProc * p_rank ;

      PartVector add(1); add[0] = & ring_mesh.m_edge_part_extra ;
      PartVector rem(1); rem[0] = ring_mesh.m_edge_parts[ n % ring_mesh.m_edge_parts.size() ];

      Entity * const edge = ring_mesh.m_bulk_data.get_entity( 1 , ring_mesh.m_edge_ids[n] );
      ring_mesh.m_bulk_data.change_entity_parts( *edge , add , rem );
      STKUNIT_ASSERT( edge->bucket().member( part_univ ) );
      STKUNIT_ASSERT( edge->bucket().member( part_owns ) );
      STKUNIT_ASSERT( edge->bucket().member(ring_mesh.m_edge_part_extra ) );
    }
  }

  ring_mesh.m_bulk_data.modification_end();

  for ( unsigned i = 0 ; i < nLocalNode ; ++i ) {
    const unsigned n = ( i + nPerProc * p_rank ) % ring_mesh.m_node_ids.size();
    const unsigned e0 = n ;
    const unsigned e1 = ( n + ring_mesh.m_edge_ids.size() - 1 ) % ring_mesh.m_edge_ids.size();
    const unsigned ns = ring_mesh.m_edge_parts.size();
    const unsigned n0 = e0 % ns ;
    const unsigned n1 = e1 % ns ;
    Part * ep_0 = e0 < nLocalEdge ? & ring_mesh.m_edge_part_extra : ring_mesh.m_edge_parts[n0] ;
    Part * ep_1 = e1 < nLocalEdge ? & ring_mesh.m_edge_part_extra : ring_mesh.m_edge_parts[n1] ;

    Part * epart_0 = ep_0->mesh_meta_data_ordinal() < ep_1->mesh_meta_data_ordinal() ? ep_0 : ep_1 ;
    Part * epart_1 = ep_0->mesh_meta_data_ordinal() < ep_1->mesh_meta_data_ordinal() ? ep_1 : ep_0 ;

    Entity * const node = ring_mesh.m_bulk_data.get_entity( 0 , ring_mesh.m_node_ids[n] );
    STKUNIT_ASSERT( node != NULL );
    if ( node->owner_rank() == p_rank ) {
      STKUNIT_ASSERT( node->bucket().member( part_owns ) );
    }
    else {
      STKUNIT_ASSERT( ! node->bucket().member( part_owns ) );
    }

    STKUNIT_ASSERT( node->bucket().member( part_univ ) );
    STKUNIT_ASSERT( node->bucket().member( *epart_0 ) );
    STKUNIT_ASSERT( node->bucket().member( *epart_1 ) );
  }
}
Esempio n. 12
0
bool use_case_blas_driver(MPI_Comm comm,
                        int num_threads,
                        int num_trials,
                        const std::string &working_directory,
                        const std::string &mesh_filename,
                        const std::string &mesh_type,
                        const std::string &thread_runner,
                        int bucket_size,
                        bool performance_test)
{
  bool output = !performance_test; // If running for performance measurements, turn off output

  if (stk::parallel_machine_rank(comm) == 0) {
    std::cout << " stk_mesh Use Case Blas - fill, axpby, dot, norm , begin" << std::endl ;
    std::cout << "Running '" << mesh_filename << "' case, num_trials = "
              << num_trials << std::endl;
  }


  const AlgorithmRunnerInterface* alg_runner = NULL ;
  if ( thread_runner.empty() ||
       thread_runner == std::string("NonThreaded") ) {
    alg_runner = stk::algorithm_runner_non_thread();
  }
  else if ( thread_runner == std::string("TPI") ) {
    alg_runner = stk::algorithm_runner_tpi(num_threads);
  }
  else if ( thread_runner == std::string("TBB") ) {
    alg_runner = stk::algorithm_runner_tbb(num_threads);
  }

  if (alg_runner != NULL) {
    if (stk::parallel_machine_rank(comm) == 0)
      std::cout << "Using " << thread_runner
                << " algorithm runner, num_threads = " << num_threads
                << std::endl;
  } else {
    std::cout << "ERROR, failed to obtain requested AlgorithmRunner '"
              << thread_runner << "'." << std::endl;
    return false;
  }

  //----------------------------------

  // Timing:
  //   [0] = stk::mesh::MetaData creation
  //   [1] = stk::mesh::BulkData creation
  //   [2] = Initialization
  //   [3] = fill and axpby
  //   [4] = dot and norm2

  double time_min[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
  double time_max[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
  double wtime = 0 ;

  //--------------------------------------------------------------------

  reset_malloc_stats();

  if ( 0 == stk::parallel_machine_rank( comm ) ) {
    std::cout << "stk_mesh performance use case BLAS" << std::endl
              << "  Number Processes = " << stk::parallel_machine_size( comm )
              << std::endl ;
    std::cout.flush();
  }

  //--------------------------------------------------------------------

  // Initialize IO system.  Registers all element types and storage
  // types and the exodusII default database type.
  Ioss::Init::Initializer init_db;

  {
    wtime = stk::wall_time();

    //------------------------------------------------------------------
    // Declare the mesh meta data: element blocks and associated fields

    stk::mesh::fem::FEMMetaData meta_data(  spatial_dimension );
    stk::io::MeshData mesh_data;
    std::string filename = working_directory + mesh_filename;
    stk::io::create_input_mesh(mesh_type, filename, comm,
			       meta_data, mesh_data);
    stk::io::define_input_fields(mesh_data, meta_data);

    Fields fields;
    use_case_14_declare_fields(fields, meta_data.get_meta_data(meta_data));

    //--------------------------------
    // Commit (finalize) the meta data.  Is now ready to be used
    // in the creation and management of mesh bulk data.

    meta_data.commit();

    //------------------------------------------------------------------

    time_max[0] = stk::wall_dtime( wtime );

    //------------------------------------------------------------------
    // stk::mesh::BulkData bulk data conforming to the meta data.
    stk::mesh::BulkData bulk_data(meta_data.get_meta_data(meta_data) , comm, bucket_size);
    stk::io::populate_bulk_data(bulk_data, mesh_data);

    //------------------------------------------------------------------
    // Create output mesh...  (input filename + ".out14")
    if (output) {
      filename = working_directory + mesh_filename + ".blas";
      stk::io::create_output_mesh(filename, comm, bulk_data, mesh_data);
      stk::io::define_output_fields(mesh_data, meta_data, true);
    }

    stk::app::use_case_14_initialize_nodal_data(bulk_data ,
                                                *fields.model_coordinates ,
                                                *fields.coordinates_field ,
                                                *fields.velocity_field,
                                                1.0 /*dt*/);

    time_max[1] = stk::wall_dtime( wtime );

    //------------------------------------------------------------------
    // Ready to run the algorithms:
    //------------------------------------------------------------------

    //------------------------------------------------------------------
    time_max[2] = stk::wall_dtime( wtime );
    //------------------------------------------------------------------

    wtime = stk::wall_time();

    double dot1 = 0;

    for(int n=0; n<num_trials; ++n) {
      //
      // Call BLAS algs.
      //

      wtime = stk::wall_time();

      fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.velocity_field, 0.2 );

      fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.fint_field, 1.0 );

      axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK ,
             0.01, *fields.model_coordinates , 1.0 , *fields.coordinates_field );

      axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK ,
             0.1, *fields.coordinates_field, 1.0 , *fields.velocity_field );

      time_max[3] += stk::wall_dtime( wtime );

      dot1 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK ,
                  *fields.velocity_field, *fields.coordinates_field );

      double dot2 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK,
                         *fields.velocity_field, *fields.fint_field );

      double norm_1 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.velocity_field );

      double norm_2 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.coordinates_field );

      if ( stk::parallel_machine_rank( comm ) == 0 ) {
        std::cout << "    " << dot1 << "  " << dot2 << "  " << norm_1 << "  " << norm_2 << std::endl;
      }

      time_max[4] += stk::wall_dtime( wtime );

      if (output) {
        stk::io::process_output_request(mesh_data, bulk_data, n);
      }

    }//end for(..num_trials...

    if ( stk::parallel_machine_rank( comm ) == 0 ) {
      //Try to make sure the number gets printed out just the way we want it,
      //so we can use it as a pass/fail check for a regression test...
      std::cout.precision(6);
      std::cout.setf(std::ios_base::scientific, std::ios_base::floatfield);
      std::cout << "Final dot1: " << dot1 << std::endl;
    }
    //------------------------------------------------------------------

#ifdef USE_GNU_MALLOC_HOOKS
    if (parallel_machine_rank(comm) == 0) {
      double net_alloc = alloc_MB() - freed_MB();
      std::cout << "Mesh creation:" << "\n   Total allocated: "
                << alloc_MB()<<"MB in "<<alloc_blks() << " blocks."
                << "\n   Total freed: " << freed_MB() << "MB in "
                << freed_blks() << " blocks."
                << "\n   Net allocated: "<<net_alloc << "MB."<<std::endl;
    }
#endif

    //------------------------------------------------------------------
  }

  time_max[8] = stk::wall_dtime( wtime );

  time_min[0] = time_max[0] ;
  time_min[1] = time_max[1] ;
  time_min[2] = time_max[2] ;
  time_min[3] = time_max[3] ;
  time_min[4] = time_max[4] ;
  time_min[5] = time_max[5] ;
  time_min[6] = time_max[6] ;
  time_min[7] = time_max[7] ;
  time_min[8] = time_max[8] ;

  stk::all_reduce( comm , stk::ReduceMax<9>( time_max ) & stk::ReduceMin<9>( time_min ) );

  time_max[3] /= num_trials ;
  time_max[4] /= num_trials ;
  time_max[5] /= num_trials ;
  time_max[6] /= num_trials ;

  time_min[3] /= num_trials ;
  time_min[4] /= num_trials ;
  time_min[5] /= num_trials ;
  time_min[6] /= num_trials ;

  //   [0] = stk::mesh::MetaData creation
  //   [1] = stk::mesh::BulkData creation
  //   [2] = Initialization
  //   [3] = Internal force

  if ( ! stk::parallel_machine_rank( comm ) ) {
    std::cout
      << "stk_mesh performance use case results:" << std::endl
      << "  Number of trials         = " << num_trials << std::endl
      << "  Meta-data setup          = " << time_min[0] << " : "
      << time_max[0] << " sec, min : max"
      << std::endl
      << "  Bulk-data generation     = " << time_min[1] << " : "
      << time_max[1] << " sec, min : max"
      << std::endl
      << "  Initialization           = " << time_min[2] << " : "
      << time_max[2] << " sec, min : max"
      << std::endl
      << "  fill & axpby (per-trial) = " << time_min[3] << " : "
      << time_max[3] << " sec, min : max"
      << std::endl
      << "  dot & norm2 (per-trial)  = " << time_min[4] << " : "
      << time_max[4] << " sec, min : max"
      << std::endl
      << "  Mesh destruction         = " << time_min[8] << " : "
      << time_max[8] << " sec, min : max"
      << std::endl
      << std::endl ;
  }

  return true;
}