BulkData::BulkData( MetaData & mesh_meta_data , ParallelMachine parallel , unsigned bucket_max_size , bool use_memory_pool ) : m_entities_index( parallel, convert_entity_keys_to_spans(mesh_meta_data) ), m_entity_repo(use_memory_pool), m_bucket_repository( *this, bucket_max_size, mesh_meta_data.entity_rank_count(), m_entity_repo ), m_entity_comm(), m_ghosting(), m_mesh_meta_data( mesh_meta_data ), m_parallel_machine( parallel ), m_parallel_size( parallel_machine_size( parallel ) ), m_parallel_rank( parallel_machine_rank( parallel ) ), m_sync_count( 0 ), m_sync_state( MODIFIABLE ), m_meta_data_verified( false ), m_optimize_buckets(false), m_mesh_finalized(false) { create_ghosting( "shared" ); create_ghosting( "shared_aura" ); m_sync_state = SYNCHRONIZED ; }
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm ) { const unsigned p_rank = parallel_machine_rank( pm ); const bool is_root = 0 == p_rank ; CommBroadcast comm( pm , 0 ); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.allocate_buffer(); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.communicate(); int ok[ 2 ]; ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() ); ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() ); all_reduce( pm , ReduceMin<2>( ok ) ); ThrowRequireMsg(ok[0], "P" << p_rank << ": FAILED for Parts"); ThrowRequireMsg(ok[1], "P" << p_rank << ": FAILED for Fields"); }
void CopySearchCommAll::do_search(const CopyTransferMeshBase & mesha, const CopyTransferMeshBase & meshb, KeyToTargetProcessor & key_to_target_processor ) { key_to_target_processor.clear(); m_remote_keys.clear(); const CopyTransferMeshBase::MeshIDVector & meshb_ids = meshb.get_mesh_ids(); const ParallelMachine comm = meshb.comm(); const int my_proc = parallel_machine_rank(comm); const int num_proc = parallel_machine_size(comm); stk::CommSparse commSparse(comm); for (int phase=0;phase<2;++phase) { for (size_t id_index=0 ; id_index<meshb_ids.size() ; ++id_index) { const Mesh_ID key = meshb_ids[id_index]; if (mesha.is_locally_owned(key)) { key_to_target_processor[key]=my_proc; continue; } m_remote_keys.insert(key); for (int send_proc = 0 ; send_proc < num_proc ; ++send_proc) { if (my_proc == send_proc) { continue; } commSparse.send_buffer(send_proc).pack<Mesh_ID>(key); } } if (phase == 0 ) { commSparse.allocate_buffers(); } else { commSparse.communicate(); } } for (int recv_proc=0;recv_proc<num_proc;++recv_proc) { if ( my_proc != recv_proc ) { while(commSparse.recv_buffer(recv_proc).remaining()) { Mesh_ID key; commSparse.recv_buffer(recv_proc).unpack<Mesh_ID>(key); if (mesha.is_locally_owned(key)) { key_to_target_processor[key] = recv_proc; } } } } }
CommSparse::CommSparse( ParallelMachine comm) : m_comm( comm ), m_size( parallel_machine_size( comm ) ), m_rank( parallel_machine_rank( comm ) ), m_send(), m_recv(), m_send_data(), m_recv_data(), m_send_procs(), m_recv_procs() { m_send.resize(m_size); m_recv.resize(m_size); }
void bounding_boxes (std::vector< std::pair<Sphere,EntityProc> > &v) const { const unsigned dimension = m_bulk_data.mesh_meta_data().spatial_dimension(); const float r = m_sphere_rad; const int proc_id = parallel_machine_rank(m_comm); v.clear(); for (EntityKeySet::const_iterator k=m_entity_keys.begin(); k!=m_entity_keys.end(); ++k) { const EntityKey id = *k; Point center; const double *c = coord(id); for (unsigned j=0; j<dimension; ++j) { center[j] = c[j]; } v.push_back( std::make_pair( Sphere(center,r), EntityProc(id, proc_id))); } }
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm ) { static const char method[] = "phdmesh::verify_parallel_consistency(MetaData)" ; const unsigned p_rank = parallel_machine_rank( pm ); const bool is_root = 0 == p_rank ; CommBroadcast comm( pm , 0 ); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.allocate_buffer(); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.communicate(); int ok[ 2 ]; ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() ); ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() ); all_reduce( pm , Min<2>( ok ) ); if ( ! ok[0] || ! ok[1] ) { std::ostringstream msg ; msg << "P" << p_rank ; msg << ": " << method ; msg << " : FAILED for:" ; if ( ! ok[0] ) { msg << " Parts" ; } if ( ! ok[1] ) { msg << " Fields" ; } throw std::logic_error( msg.str() ); } }
void communicate_field_data( ParallelMachine machine, const std::vector<EntityProc> & domain , const std::vector<EntityProc> & range , const std::vector<const FieldBase *> & fields) { if ( fields.empty() ) { return; } const unsigned parallel_size = parallel_machine_size( machine ); const unsigned parallel_rank = parallel_machine_rank( machine ); const bool asymmetric = & domain != & range ; const std::vector<const FieldBase *>::const_iterator fe = fields.end(); const std::vector<const FieldBase *>::const_iterator fb = fields.begin(); std::vector<const FieldBase *>::const_iterator fi ; // Sizing for send and receive const unsigned zero = 0 ; std::vector<unsigned> send_size( parallel_size , zero ); std::vector<unsigned> recv_size( parallel_size , zero ); std::vector<EntityProc>::const_iterator i ; for ( i = domain.begin() ; i != domain.end() ; ++i ) { Entity & e = * i->first ; const unsigned p = i->second ; if ( asymmetric || parallel_rank == e.owner_rank() ) { unsigned e_size = 0 ; for ( fi = fb ; fi != fe ; ++fi ) { const FieldBase & f = **fi ; e_size += field_data_size( f , e ); } send_size[ p ] += e_size ; } } for ( i = range.begin() ; i != range.end() ; ++i ) { Entity & e = * i->first ; const unsigned p = i->second ; if ( asymmetric || p == e.owner_rank() ) { unsigned e_size = 0 ; for ( fi = fb ; fi != fe ; ++fi ) { const FieldBase & f = **fi ; e_size += field_data_size( f , e ); } recv_size[ p ] += e_size ; } } // Allocate send and receive buffers: CommAll sparse ; { const unsigned * const s_size = & send_size[0] ; const unsigned * const r_size = & recv_size[0] ; sparse.allocate_buffers( machine, parallel_size / 4 , s_size, r_size); } // Pack for send: for ( i = domain.begin() ; i != domain.end() ; ++i ) { Entity & e = * i->first ; const unsigned p = i->second ; if ( asymmetric || parallel_rank == e.owner_rank() ) { CommBuffer & b = sparse.send_buffer( p ); for ( fi = fb ; fi != fe ; ++fi ) { const FieldBase & f = **fi ; const unsigned size = field_data_size( f , e ); if ( size ) { unsigned char * ptr = reinterpret_cast<unsigned char *>(field_data( f , e )); b.pack<unsigned char>( ptr , size ); } } } } // Communicate: sparse.communicate(); // Unpack for recv: for ( i = range.begin() ; i != range.end() ; ++i ) { Entity & e = * i->first ; const unsigned p = i->second ; if ( asymmetric || p == e.owner_rank() ) { CommBuffer & b = sparse.recv_buffer( p ); for ( fi = fb ; fi != fe ; ++fi ) { const FieldBase & f = **fi ; const unsigned size = field_data_size( f , e ); if ( size ) { unsigned char * ptr = reinterpret_cast<unsigned char *>(field_data( f , e )); b.unpack<unsigned char>( ptr , size ); } } } } }
CR_Matrix::CR_Matrix( ParallelMachine arg_comm , const std::vector<unsigned> & arg_partition , std::vector<unsigned> & arg_prefix , std::vector<unsigned> & arg_coli , std::vector<double> & arg_coef ) : m_comm( arg_comm ), m_comm_size( parallel_machine_size( arg_comm ) ), m_comm_rank( parallel_machine_rank( arg_comm ) ), m_sparse( false ), m_work_disp(), m_send_disp(), m_send_map(), m_row_size( 0 ), m_prefix(), m_coli(), m_coef() { static const char method[] = "phdmesh::CR_Matrix::CR_Matrix" ; if ( arg_prefix.empty() ) { return ; } //------------------------------------ if ( arg_coli.size() != arg_prefix.back() || arg_coef.size() != arg_prefix.back() ) { std::ostringstream msg ; msg << method << " ERROR" ; msg << " arg_coli.size() = " << arg_coli.size() ; msg << " arg_coef.size() = " << arg_coef.size() ; msg << " != arg_prefix.back() = " << arg_prefix.back() ; throw std::invalid_argument( msg.str() ); } swap( m_prefix , arg_prefix ); swap( m_coli , arg_coli ); swap( m_coef , arg_coef ); m_row_size = m_prefix.size() - 1 ; if ( 1 == m_comm_size ) { return ; } //------------------------------------ if ( arg_partition.size() != 1 + m_comm_size ) { std::ostringstream msg ; msg << method << " ERROR" ; msg << " comm_size = " << m_comm_size ; msg << " + 1 != arg_partition.size() = " << arg_partition.size() ; throw std::invalid_argument( msg.str() ); } const unsigned row_first = arg_partition[ m_comm_rank ]; const unsigned row_end = arg_partition[ m_comm_rank + 1 ] ; if ( m_row_size != ( row_end - row_first ) ) { std::ostringstream msg ; msg << method << " ERROR" ; msg << " arg_prefix'row_size = " << m_row_size ; msg << " != arg_partition'row_size = " << ( row_end - row_first ); throw std::invalid_argument( msg.str() ); } //------------------------------------ m_send_disp.resize( m_comm_size + 1 ); m_work_disp.resize( m_comm_size + 1 ); // Generate a vector of off-processor column identifiers std::vector<unsigned> work_col_ident ; { const std::vector<unsigned>::iterator j = m_coli.end(); std::vector<unsigned>::iterator b = m_coli.begin(); std::vector<unsigned>::iterator i ; for ( i = b ; j != i ; ++i ) { const unsigned global_col = *i ; if ( global_col < row_first || row_end <= global_col ) { ordered_insert( work_col_ident , global_col ); } } } //------------------------------------ // Map column global identifiers to local work offsets { const std::vector<unsigned>::iterator b = work_col_ident.begin(); const std::vector<unsigned>::iterator e = work_col_ident.end(); std::vector<unsigned>::iterator j ; j = std::lower_bound( b , e , row_end ); const unsigned local_row_end = j - b ; for ( std::vector<unsigned>::iterator i = m_coli.begin() ; i != m_coli.end() ; ++i ) { const unsigned global_col = *i ; j = std::lower_bound( b, e, global_col ); unsigned local_col = j - b ; if ( row_end <= global_col ) { local_col += local_row_end ; } *i = local_col ; } } //------------------------------------ // Displacement prefix for work vector { std::vector<unsigned>::const_iterator i = work_col_ident.begin() ; m_work_disp[0] = 0 ; for ( unsigned p = 0 ; p < m_comm_size ; ++p ) { const unsigned p_row_end = arg_partition[p+1] ; unsigned count = 0 ; for ( ; i != work_col_ident.end() && *i < p_row_end ; ++i ) { ++count ; } m_work_disp[p+1] = m_work_disp[p] + count ; } } //------------------------------------ // Set up communications to gather work subvector { std::vector<unsigned> send_col_size( m_comm_size ); std::vector<unsigned> recv_col_size( m_comm_size ); for ( unsigned p = 0 ; p < m_comm_size ; ++p ) { send_col_size[p] = m_work_disp[p+1] - m_work_disp[p] ; } if ( send_col_size[ m_comm_rank ] ) { std::ostringstream msg ; msg << method << " ERROR with communication sizing logic" ; throw std::logic_error( msg.str() ); } unsigned num_msg_maximum = 0 ; comm_sizes( m_comm , m_comm_size / 4 , num_msg_maximum , & send_col_size[0] , & recv_col_size[0] ); m_sparse = num_msg_maximum < ( m_comm_size / 4 ); m_send_disp[0] = 0 ; for ( unsigned p = 0 ; p < m_comm_size ; ++p ) { m_send_disp[p+1] = m_send_disp[p] + recv_col_size[p] ; } } const unsigned send_map_size = m_send_disp[ m_comm_size ]; m_send_map.resize( send_map_size ); all_to_all( m_comm , PARALLEL_DATATYPE_UNSIGNED , m_sparse , & work_col_ident[0] , & m_work_disp[0], & m_send_map[0] , & m_send_disp[0] ); //------------------------------------ // Remap the 'm_work_disp' for receiving coefficients into the // work vector: [ lower_row_recv , local_row , upper_row_recv ] for ( unsigned p = m_comm_rank ; p < m_comm_size ; ++p ) { m_work_disp[p+1] += m_row_size ; } //------------------------------------ // Map the send_map from global to local indices, // also sanity check it. for ( unsigned i = 0 ; i < send_map_size ; ++i ) { if ( m_send_map[i] < (int) row_first || (int) row_end <= m_send_map[i] ) { std::ostringstream msg ; msg << method << " ERROR Received index " ; msg << m_send_map[i] ; msg << " out of range [ " ; msg << row_first ; msg << " : " ; msg << row_end ; msg << " )" ; throw std::runtime_error( msg.str() ); } m_send_map[i] -= row_first ; } }
void comm_recv_procs_and_msg_sizes(ParallelMachine comm , const std::vector<CommBuffer>& send_bufs , std::vector<CommBuffer>& recv_bufs, std::vector<int>& send_procs, std::vector<int>& recv_procs) { static const char method[] = "stk::comm_procs_and_msg_recv_sizes" ; const int p_size = parallel_machine_size( comm ); int result = MPI_SUCCESS ; MPI_Datatype uint_type = MPI_LONG_LONG; if (sizeof(int) == sizeof(unsigned)) uint_type = MPI_INT; else if (sizeof(long) == sizeof(unsigned)) uint_type = MPI_LONG; else if (sizeof(long long) == sizeof(unsigned)) uint_type = MPI_LONG_LONG; else { std::ostringstream msg ; msg << method << " ERROR: No matching MPI type found for size_t argument"; throw std::runtime_error(msg.str()); } std::vector<unsigned> buf; buf.reserve(p_size*2); int* recvcounts = reinterpret_cast<int*>(&buf[0]); unsigned * tmp = &buf[p_size]; send_procs.clear(); send_procs.reserve(16); for ( int i = 0 ; i < p_size ; ++i ) { recvcounts[i] = 1; tmp[i] = 0; if ( send_bufs[i].size() > 0 ) { tmp[i] = 1 ; send_procs.push_back(i); } } unsigned num_recv = 0; result = MPI_Reduce_scatter(tmp,&num_recv,recvcounts,uint_type,MPI_SUM,comm); if ( result != MPI_SUCCESS ) { // PARALLEL ERROR std::ostringstream msg ; msg << method << " ERROR: " << result << " == MPI_Reduce_scatter" ; throw std::runtime_error( msg.str() ); } // do point-to-point send/recvs const int mpi_tag = STK_COMMSPARSE_MPI_TAG_PROC_SIZING; MPI_Request request_null = MPI_REQUEST_NULL ; MPI_Status init_status; std::vector<MPI_Request> request( num_recv , request_null ); std::vector<MPI_Status> status( num_recv , init_status ); // Post receives for point-to-point message sizes for ( unsigned i = 0 ; i < num_recv ; ++i ) { unsigned * const p_buf = & buf[i] ; MPI_Request * const p_request = & request[i] ; result = MPI_Irecv( p_buf , 1 , uint_type, MPI_ANY_SOURCE , mpi_tag , comm , p_request ); if ( MPI_SUCCESS != result ) { // LOCAL ERROR std::ostringstream msg ; msg << method << " ERROR: " << result << " == MPI_Irecv" ; throw std::runtime_error( msg.str() ); } } // Send the point-to-point message sizes, for ( size_t i = 0 ; i < send_procs.size() ; ++i ) { int dst = send_procs[i]; unsigned value = send_bufs[dst].size(); result = MPI_Send( & value , 1 , uint_type, dst , mpi_tag , comm ); if ( MPI_SUCCESS != result ) { // LOCAL ERROR std::ostringstream msg ; msg << method << " ERROR: " << result << " == MPI_Send" ; throw std::runtime_error( msg.str() ); } } // Wait for all receives { MPI_Request * const p_request = (request.empty() ? NULL : & request[0]) ; MPI_Status * const p_status = (status.empty() ? NULL : & status[0]) ; result = MPI_Waitall( num_recv , p_request , p_status ); } if ( MPI_SUCCESS != result ) { // LOCAL ERROR ? std::ostringstream msg ; msg << method << " ERROR: " << result << " == MPI_Waitall" ; throw std::runtime_error( msg.str() ); } recv_procs.resize(num_recv); // Set the receive message sizes for ( unsigned i = 0 ; i < num_recv ; ++i ) { MPI_Status * const recv_status = & status[i] ; const int recv_proc = recv_status->MPI_SOURCE ; #ifndef NDEBUG //debug-mode-only error check const int recv_tag = recv_status->MPI_TAG ; int recv_count = 0 ; MPI_Get_count( recv_status , uint_type , & recv_count ); if ( recv_tag != mpi_tag || recv_count != 1 ) { std::ostringstream msg ; const int p_rank = parallel_machine_rank( comm ); msg << method << " ERROR: Received buffer mismatch " ; msg << "P" << p_rank << " <- P" << recv_proc ; msg << " " << 1 << " != " << recv_count ; throw std::runtime_error( msg.str() ); } #endif recv_bufs[ recv_proc ].set_size(buf[i]); recv_procs[i] = recv_proc; } }
void UnitTestBulkData::testChangeParts( ParallelMachine pm ) { static const char method[] = "stk::mesh::UnitTestBulkData::testChangeParts" ; std::cout << std::endl << method << std::endl ; const unsigned p_size = parallel_machine_size( pm ); const unsigned p_rank = parallel_machine_rank( pm ); if ( 1 < p_size ) return ; // Single process, no sharing // Meta data with entity ranks [0..9] std::vector<std::string> entity_names(10); for ( size_t i = 0 ; i < 10 ; ++i ) { std::ostringstream name ; name << "EntityRank_" << i ; entity_names[i] = name.str(); } MetaData meta( entity_names ); BulkData bulk( meta , pm , 100 ); Part & part_univ = meta.universal_part(); Part & part_owns = meta.locally_owned_part(); Part & part_A_0 = meta.declare_part( std::string("A_0") , 0 ); Part & part_A_1 = meta.declare_part( std::string("A_1") , 1 ); Part & part_A_2 = meta.declare_part( std::string("A_2") , 2 ); Part & part_A_3 = meta.declare_part( std::string("A_3") , 3 ); Part & part_B_0 = meta.declare_part( std::string("B_0") , 0 ); // Part & part_B_1 = meta.declare_part( std::string("B_1") , 1 ); Part & part_B_2 = meta.declare_part( std::string("B_2") , 2 ); // Part & part_B_3 = meta.declare_part( std::string("B_3") , 3 ); meta.commit(); bulk.modification_begin(); PartVector tmp(1); tmp[0] = & part_A_0 ; Entity & entity_0_1 = bulk.declare_entity( 0 , 1 , tmp ); tmp[0] = & part_A_1 ; Entity & entity_1_1 = bulk.declare_entity( 1 , 1 , tmp ); tmp[0] = & part_A_2 ; Entity & entity_2_1 = bulk.declare_entity( 2 , 1 , tmp ); tmp[0] = & part_A_3 ; Entity & entity_3_1 = bulk.declare_entity( 3 , 1 , tmp ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); entity_1_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_1 ); entity_2_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_2 ); entity_3_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_3 ); { tmp.resize(1); tmp[0] = & part_A_0 ; bulk.change_entity_parts( entity_0_1 , tmp ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); } { // Add a new part: tmp.resize(1); tmp[0] = & part_B_0 ; bulk.change_entity_parts( entity_0_1 , tmp ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); STKUNIT_ASSERT( tmp[3] == & part_B_0 ); } { // Remove the part just added: tmp.resize(1); tmp[0] = & part_B_0 ; bulk.change_entity_parts( entity_0_1 , PartVector() , tmp ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); } { // Relationship induced membership: bulk.declare_relation( entity_1_1 , entity_0_1 , 0 ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); STKUNIT_ASSERT( tmp[3] == & part_A_1 ); } { // Remove relationship induced membership: bulk.destroy_relation( entity_1_1 , entity_0_1 ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); } { // Add a new part: tmp.resize(1); tmp[0] = & part_B_2 ; bulk.change_entity_parts( entity_2_1 , tmp ); entity_2_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_2 ); STKUNIT_ASSERT( tmp[3] == & part_B_2 ); } { // Relationship induced membership: bulk.declare_relation( entity_2_1 , entity_0_1 , 0 ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(5) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); STKUNIT_ASSERT( tmp[3] == & part_A_2 ); STKUNIT_ASSERT( tmp[4] == & part_B_2 ); } { // Remove relationship induced membership: bulk.destroy_relation( entity_2_1 , entity_0_1 ); entity_0_1.bucket().supersets( tmp ); STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); } bulk.modification_end(); //------------------------------ // Now the parallel fun. Existing entities should be shared // by all processes since they have the same identifiers. // They should also have the same parts. entity_0_1.bucket().supersets( tmp ); if ( entity_0_1.owner_rank() == p_rank ) { STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); } else { STKUNIT_ASSERT_EQUAL( size_t(2) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_A_0 ); } entity_2_1.bucket().supersets( tmp ); if ( entity_2_1.owner_rank() == p_rank ) { STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_2 ); STKUNIT_ASSERT( tmp[3] == & part_B_2 ); } else { STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_A_2 ); STKUNIT_ASSERT( tmp[2] == & part_B_2 ); } if (bulk.parallel_size() > 1) { STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_0_1.sharing().size() ); STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_1_1.sharing().size() ); STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_2_1.sharing().size() ); STKUNIT_ASSERT_EQUAL( size_t(p_size - 1) , entity_3_1.sharing().size() ); } bulk.modification_begin(); // Add a new part on the owning process: int ok_to_modify = entity_0_1.owner_rank() == p_rank ; try { tmp.resize(1); tmp[0] = & part_B_0 ; bulk.change_entity_parts( entity_0_1 , tmp ); STKUNIT_ASSERT( ok_to_modify ); } catch( const std::exception & x ) { STKUNIT_ASSERT( ! ok_to_modify ); } entity_0_1.bucket().supersets( tmp ); if ( entity_0_1.owner_rank() == p_rank ) { STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); STKUNIT_ASSERT( tmp[3] == & part_B_0 ); } else { STKUNIT_ASSERT_EQUAL( size_t(2) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_A_0 ); } bulk.modification_end(); entity_0_1.bucket().supersets( tmp ); if ( entity_0_1.owner_rank() == p_rank ) { STKUNIT_ASSERT_EQUAL( size_t(4) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_owns ); STKUNIT_ASSERT( tmp[2] == & part_A_0 ); STKUNIT_ASSERT( tmp[3] == & part_B_0 ); } else { STKUNIT_ASSERT_EQUAL( size_t(3) , tmp.size() ); STKUNIT_ASSERT( tmp[0] == & part_univ ); STKUNIT_ASSERT( tmp[1] == & part_A_0 ); STKUNIT_ASSERT( tmp[2] == & part_B_0 ); } }
void UnitTestBulkData::testChangeParts_loop( ParallelMachine pm ) { enum { nPerProc = 10 }; const unsigned p_rank = parallel_machine_rank( pm ); const unsigned p_size = parallel_machine_size( pm ); const unsigned nLocalNode = nPerProc + ( 1 < p_size ? 1 : 0 ); const unsigned nLocalEdge = nPerProc ; UnitTestRingMeshFixture ring_mesh( pm , nPerProc , true /* generate parts */ ); ring_mesh.m_meta_data.commit(); ring_mesh.generate_mesh( false /* no aura */ ); Part & part_owns = ring_mesh.m_meta_data.locally_owned_part(); Part & part_univ = ring_mesh.m_meta_data.universal_part(); Selector select_owned( ring_mesh.m_meta_data.locally_owned_part() ); Selector select_used = select_owned | ring_mesh.m_meta_data.globally_shared_part(); Selector select_all( ring_mesh.m_meta_data.universal_part() ); std::vector<unsigned> local_count ; for ( unsigned i = 0 ; i < nLocalEdge ; ++i ) { const unsigned n = i + nPerProc * p_rank ; Entity * const edge = ring_mesh.m_bulk_data.get_entity( 1 , ring_mesh.m_edge_ids[n] ); STKUNIT_ASSERT( edge != NULL ); STKUNIT_ASSERT( edge->bucket().member( part_univ ) ); STKUNIT_ASSERT( edge->bucket().member( part_owns ) ); STKUNIT_ASSERT( edge->bucket().member( * ring_mesh.m_edge_parts[ n % ring_mesh.m_edge_parts.size() ] ) ); } for ( unsigned i = 0 ; i < nLocalNode ; ++i ) { const unsigned n = ( i + nPerProc * p_rank ) % ring_mesh.m_node_ids.size(); const unsigned e0 = n ; const unsigned e1 = ( n + ring_mesh.m_edge_ids.size() - 1 ) % ring_mesh.m_edge_ids.size(); const unsigned ns = ring_mesh.m_edge_parts.size(); const unsigned n0 = e0 % ns ; const unsigned n1 = e1 % ns ; Part * const epart_0 = ring_mesh.m_edge_parts[ n0 < n1 ? n0 : n1 ]; Part * const epart_1 = ring_mesh.m_edge_parts[ n0 < n1 ? n1 : n0 ]; Entity * const node = ring_mesh.m_bulk_data.get_entity( 0 , ring_mesh.m_node_ids[n] ); STKUNIT_ASSERT( node != NULL ); if ( node->owner_rank() == p_rank ) { STKUNIT_ASSERT( node->bucket().member( part_univ ) ); STKUNIT_ASSERT( node->bucket().member( part_owns ) ); STKUNIT_ASSERT( node->bucket().member( *epart_0 ) ); STKUNIT_ASSERT( node->bucket().member( *epart_1 ) ); } else { STKUNIT_ASSERT( node->bucket().member( part_univ ) ); STKUNIT_ASSERT( ! node->bucket().member( part_owns ) ); STKUNIT_ASSERT( node->bucket().member( * epart_0 ) ); STKUNIT_ASSERT( node->bucket().member( * epart_1 ) ); } } ring_mesh.m_bulk_data.modification_begin(); if ( 0 == p_rank ) { for ( unsigned i = 0 ; i < nLocalEdge ; ++i ) { const unsigned n = i + nPerProc * p_rank ; PartVector add(1); add[0] = & ring_mesh.m_edge_part_extra ; PartVector rem(1); rem[0] = ring_mesh.m_edge_parts[ n % ring_mesh.m_edge_parts.size() ]; Entity * const edge = ring_mesh.m_bulk_data.get_entity( 1 , ring_mesh.m_edge_ids[n] ); ring_mesh.m_bulk_data.change_entity_parts( *edge , add , rem ); STKUNIT_ASSERT( edge->bucket().member( part_univ ) ); STKUNIT_ASSERT( edge->bucket().member( part_owns ) ); STKUNIT_ASSERT( edge->bucket().member(ring_mesh.m_edge_part_extra ) ); } } ring_mesh.m_bulk_data.modification_end(); for ( unsigned i = 0 ; i < nLocalNode ; ++i ) { const unsigned n = ( i + nPerProc * p_rank ) % ring_mesh.m_node_ids.size(); const unsigned e0 = n ; const unsigned e1 = ( n + ring_mesh.m_edge_ids.size() - 1 ) % ring_mesh.m_edge_ids.size(); const unsigned ns = ring_mesh.m_edge_parts.size(); const unsigned n0 = e0 % ns ; const unsigned n1 = e1 % ns ; Part * ep_0 = e0 < nLocalEdge ? & ring_mesh.m_edge_part_extra : ring_mesh.m_edge_parts[n0] ; Part * ep_1 = e1 < nLocalEdge ? & ring_mesh.m_edge_part_extra : ring_mesh.m_edge_parts[n1] ; Part * epart_0 = ep_0->mesh_meta_data_ordinal() < ep_1->mesh_meta_data_ordinal() ? ep_0 : ep_1 ; Part * epart_1 = ep_0->mesh_meta_data_ordinal() < ep_1->mesh_meta_data_ordinal() ? ep_1 : ep_0 ; Entity * const node = ring_mesh.m_bulk_data.get_entity( 0 , ring_mesh.m_node_ids[n] ); STKUNIT_ASSERT( node != NULL ); if ( node->owner_rank() == p_rank ) { STKUNIT_ASSERT( node->bucket().member( part_owns ) ); } else { STKUNIT_ASSERT( ! node->bucket().member( part_owns ) ); } STKUNIT_ASSERT( node->bucket().member( part_univ ) ); STKUNIT_ASSERT( node->bucket().member( *epart_0 ) ); STKUNIT_ASSERT( node->bucket().member( *epart_1 ) ); } }
bool use_case_blas_driver(MPI_Comm comm, int num_threads, int num_trials, const std::string &working_directory, const std::string &mesh_filename, const std::string &mesh_type, const std::string &thread_runner, int bucket_size, bool performance_test) { bool output = !performance_test; // If running for performance measurements, turn off output if (stk::parallel_machine_rank(comm) == 0) { std::cout << " stk_mesh Use Case Blas - fill, axpby, dot, norm , begin" << std::endl ; std::cout << "Running '" << mesh_filename << "' case, num_trials = " << num_trials << std::endl; } const AlgorithmRunnerInterface* alg_runner = NULL ; if ( thread_runner.empty() || thread_runner == std::string("NonThreaded") ) { alg_runner = stk::algorithm_runner_non_thread(); } else if ( thread_runner == std::string("TPI") ) { alg_runner = stk::algorithm_runner_tpi(num_threads); } else if ( thread_runner == std::string("TBB") ) { alg_runner = stk::algorithm_runner_tbb(num_threads); } if (alg_runner != NULL) { if (stk::parallel_machine_rank(comm) == 0) std::cout << "Using " << thread_runner << " algorithm runner, num_threads = " << num_threads << std::endl; } else { std::cout << "ERROR, failed to obtain requested AlgorithmRunner '" << thread_runner << "'." << std::endl; return false; } //---------------------------------- // Timing: // [0] = stk::mesh::MetaData creation // [1] = stk::mesh::BulkData creation // [2] = Initialization // [3] = fill and axpby // [4] = dot and norm2 double time_min[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; double time_max[9] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; double wtime = 0 ; //-------------------------------------------------------------------- reset_malloc_stats(); if ( 0 == stk::parallel_machine_rank( comm ) ) { std::cout << "stk_mesh performance use case BLAS" << std::endl << " Number Processes = " << stk::parallel_machine_size( comm ) << std::endl ; std::cout.flush(); } //-------------------------------------------------------------------- // Initialize IO system. Registers all element types and storage // types and the exodusII default database type. Ioss::Init::Initializer init_db; { wtime = stk::wall_time(); //------------------------------------------------------------------ // Declare the mesh meta data: element blocks and associated fields stk::mesh::fem::FEMMetaData meta_data( spatial_dimension ); stk::io::MeshData mesh_data; std::string filename = working_directory + mesh_filename; stk::io::create_input_mesh(mesh_type, filename, comm, meta_data, mesh_data); stk::io::define_input_fields(mesh_data, meta_data); Fields fields; use_case_14_declare_fields(fields, meta_data.get_meta_data(meta_data)); //-------------------------------- // Commit (finalize) the meta data. Is now ready to be used // in the creation and management of mesh bulk data. meta_data.commit(); //------------------------------------------------------------------ time_max[0] = stk::wall_dtime( wtime ); //------------------------------------------------------------------ // stk::mesh::BulkData bulk data conforming to the meta data. stk::mesh::BulkData bulk_data(meta_data.get_meta_data(meta_data) , comm, bucket_size); stk::io::populate_bulk_data(bulk_data, mesh_data); //------------------------------------------------------------------ // Create output mesh... (input filename + ".out14") if (output) { filename = working_directory + mesh_filename + ".blas"; stk::io::create_output_mesh(filename, comm, bulk_data, mesh_data); stk::io::define_output_fields(mesh_data, meta_data, true); } stk::app::use_case_14_initialize_nodal_data(bulk_data , *fields.model_coordinates , *fields.coordinates_field , *fields.velocity_field, 1.0 /*dt*/); time_max[1] = stk::wall_dtime( wtime ); //------------------------------------------------------------------ // Ready to run the algorithms: //------------------------------------------------------------------ //------------------------------------------------------------------ time_max[2] = stk::wall_dtime( wtime ); //------------------------------------------------------------------ wtime = stk::wall_time(); double dot1 = 0; for(int n=0; n<num_trials; ++n) { // // Call BLAS algs. // wtime = stk::wall_time(); fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.velocity_field, 0.2 ); fill( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.fint_field, 1.0 ); axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , 0.01, *fields.model_coordinates , 1.0 , *fields.coordinates_field ); axpby( *alg_runner, bulk_data , stk::mesh::fem::FEMMetaData::NODE_RANK , 0.1, *fields.coordinates_field, 1.0 , *fields.velocity_field ); time_max[3] += stk::wall_dtime( wtime ); dot1 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK , *fields.velocity_field, *fields.coordinates_field ); double dot2 = dot( *alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.velocity_field, *fields.fint_field ); double norm_1 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.velocity_field ); double norm_2 = norm2(*alg_runner, bulk_data, stk::mesh::fem::FEMMetaData::NODE_RANK, *fields.coordinates_field ); if ( stk::parallel_machine_rank( comm ) == 0 ) { std::cout << " " << dot1 << " " << dot2 << " " << norm_1 << " " << norm_2 << std::endl; } time_max[4] += stk::wall_dtime( wtime ); if (output) { stk::io::process_output_request(mesh_data, bulk_data, n); } }//end for(..num_trials... if ( stk::parallel_machine_rank( comm ) == 0 ) { //Try to make sure the number gets printed out just the way we want it, //so we can use it as a pass/fail check for a regression test... std::cout.precision(6); std::cout.setf(std::ios_base::scientific, std::ios_base::floatfield); std::cout << "Final dot1: " << dot1 << std::endl; } //------------------------------------------------------------------ #ifdef USE_GNU_MALLOC_HOOKS if (parallel_machine_rank(comm) == 0) { double net_alloc = alloc_MB() - freed_MB(); std::cout << "Mesh creation:" << "\n Total allocated: " << alloc_MB()<<"MB in "<<alloc_blks() << " blocks." << "\n Total freed: " << freed_MB() << "MB in " << freed_blks() << " blocks." << "\n Net allocated: "<<net_alloc << "MB."<<std::endl; } #endif //------------------------------------------------------------------ } time_max[8] = stk::wall_dtime( wtime ); time_min[0] = time_max[0] ; time_min[1] = time_max[1] ; time_min[2] = time_max[2] ; time_min[3] = time_max[3] ; time_min[4] = time_max[4] ; time_min[5] = time_max[5] ; time_min[6] = time_max[6] ; time_min[7] = time_max[7] ; time_min[8] = time_max[8] ; stk::all_reduce( comm , stk::ReduceMax<9>( time_max ) & stk::ReduceMin<9>( time_min ) ); time_max[3] /= num_trials ; time_max[4] /= num_trials ; time_max[5] /= num_trials ; time_max[6] /= num_trials ; time_min[3] /= num_trials ; time_min[4] /= num_trials ; time_min[5] /= num_trials ; time_min[6] /= num_trials ; // [0] = stk::mesh::MetaData creation // [1] = stk::mesh::BulkData creation // [2] = Initialization // [3] = Internal force if ( ! stk::parallel_machine_rank( comm ) ) { std::cout << "stk_mesh performance use case results:" << std::endl << " Number of trials = " << num_trials << std::endl << " Meta-data setup = " << time_min[0] << " : " << time_max[0] << " sec, min : max" << std::endl << " Bulk-data generation = " << time_min[1] << " : " << time_max[1] << " sec, min : max" << std::endl << " Initialization = " << time_min[2] << " : " << time_max[2] << " sec, min : max" << std::endl << " fill & axpby (per-trial) = " << time_min[3] << " : " << time_max[3] << " sec, min : max" << std::endl << " dot & norm2 (per-trial) = " << time_min[4] << " : " << time_max[4] << " sec, min : max" << std::endl << " Mesh destruction = " << time_min[8] << " : " << time_max[8] << " sec, min : max" << std::endl << std::endl ; } return true; }