bool comm_mesh_verify_parallel_consistency( BulkData & M , std::ostream & error_log ) { int result = 1 ; // Verify consistency of parallel attributes result = verify_parallel_attributes( M , error_log ); if (M.parallel_size() > 1) { all_reduce( M.parallel() , ReduceMin<1>( & result ) ); } // Verify entities against owner. if ( result ) { CommAll all( M.parallel() ); pack_owned_verify( all , M ); all.allocate_buffers( all.parallel_size() / 4 ); pack_owned_verify( all , M ); all.communicate(); result = unpack_not_owned_verify( all , M , error_log ); if (M.parallel_size() > 1) { all_reduce( M.parallel() , ReduceMin<1>( & result ) ); } } return result == 1 ; }
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm ) { const unsigned p_rank = parallel_machine_rank( pm ); const bool is_root = 0 == p_rank ; CommBroadcast comm( pm , 0 ); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.allocate_buffer(); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.communicate(); int ok[ 2 ]; ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() ); ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() ); all_reduce( pm , ReduceMin<2>( ok ) ); ThrowRequireMsg(ok[0], "P" << p_rank << ": FAILED for Parts"); ThrowRequireMsg(ok[1], "P" << p_rank << ": FAILED for Fields"); }
inline T all_reduce(ProcessGroup pg, const T& value, BinaryOperation bin_op) { T result; all_reduce(pg, const_cast<T*>(&value), const_cast<T*>(&value+1), &result, bin_op); return result; }
void CowichanMPI::life(BoolMatrix matrixIn, BoolMatrix matrixOut) { int i; // iteration index index_t r; // row index int alive; // number alive index_t lo, hi; // work controls index_t rlo, rhi; // for broadcast bool work; // useful work to do? int is_alive = 1; // some cells still alive? BoolMatrix m_tmp; // tmp pointer // work work = get_block (world, 0, nr, &lo, &hi); for (i = 0; (i < lifeIterations) && (is_alive > 0); i++) { // reset alive neighbour count alive = 0; // count neighbours and fill new matrix if (work) { for (r = lo; r < hi; r++) { for (index_t c = 0; c < nc; ++c) { index_t count = sumNeighbours(matrixIn, r, c, nr, nc); if (count == 3 || ((count == 2) && MATRIX_RECT(matrixIn, r, c))) { MATRIX_RECT(matrixOut, r, c) = true; ++alive; } else { MATRIX_RECT(matrixOut, r, c) = false; } } } } // broadcast matrix for (r = 0; r < world.size (); r++) { if (get_block (world, 0, nr, &rlo, &rhi, r)) { broadcast (world, &MATRIX_RECT(matrixOut, rlo, 0), (int)((rhi - rlo) * nc), (int)r); } } // is_alive is maximum of local alive's all_reduce (world, alive, is_alive, mpi::maximum<int>()); // swap matrices (ping-pong) m_tmp = matrixIn; matrixIn = matrixOut; matrixOut = m_tmp; } }
void communicate_field_data_verify_read( CommAll & sparse ) { std::ostringstream msg ; int error = 0 ; for ( unsigned p = 0 ; p < sparse.parallel_size() ; ++p ) { if ( sparse.recv_buffer( p ).remaining() ) { msg << "P" << sparse.parallel_rank() << " Unread data from P" << p << std::endl ; error = 1 ; } } all_reduce( sparse.parallel() , ReduceSum<1>( & error ) ); ThrowErrorMsgIf( error, msg.str() ); }
void communicate_field_data_verify_read( CommAll & sparse ) { std::ostringstream msg ; int flag = 0 ; for ( unsigned p = 0 ; p < sparse.parallel_size() ; ++p ) { if ( sparse.recv_buffer( p ).remaining() ) { msg << "P" << sparse.parallel_rank() << " Unread data from P" << p << std::endl ; flag = 1 ; } } all_reduce( sparse.parallel() , ReduceSum<1>( & flag ) ); if ( flag ) { throw std::runtime_error( msg.str() ); } }
int main(int argc, char* argv[]) { mpi::environment env(argc, argv); mpi::communicator world; std::srand(world.rank()); int my_number = std::rand(); all_reduce(world, my_number, mpi::minimum<int>()); if (world.rank() == 0) { std::cout << "The minimum value is " << my_number << std::endl; } return 0; }
void verify_parallel_consistency( const MetaData & s , ParallelMachine pm ) { static const char method[] = "phdmesh::verify_parallel_consistency(MetaData)" ; const unsigned p_rank = parallel_machine_rank( pm ); const bool is_root = 0 == p_rank ; CommBroadcast comm( pm , 0 ); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.allocate_buffer(); if ( is_root ) { pack( comm.send_buffer() , s.get_parts() ); pack( comm.send_buffer() , s.get_fields() ); } comm.communicate(); int ok[ 2 ]; ok[0] = unpack_verify( comm.recv_buffer() , s.get_parts() ); ok[1] = unpack_verify( comm.recv_buffer() , s.get_fields() ); all_reduce( pm , Min<2>( ok ) ); if ( ! ok[0] || ! ok[1] ) { std::ostringstream msg ; msg << "P" << p_rank ; msg << ": " << method ; msg << " : FAILED for:" ; if ( ! ok[0] ) { msg << " Parts" ; } if ( ! ok[1] ) { msg << " Fields" ; } throw std::logic_error( msg.str() ); } }
void AllReduce( MPI_Comm comm, const ReduceSet & reduce_set) { size_t size = reduce_set.size(); if (size) { char *input_buffer = new char[size]; char *output_buffer = new char[size]; void *inbuf = (void *) input_buffer; void *outbuf = (void *) output_buffer; s_currentReduceSet = &reduce_set; ParallelReduceOp f = reinterpret_cast<ParallelReduceOp>(& ReduceSet::void_op); reduce_set.copyin(inbuf); all_reduce(comm, f, inbuf, outbuf, size); reduce_set.copyout(outbuf); delete [] output_buffer; delete [] input_buffer; } }
/** Evaluate the function at this input point (or points) returning value(s) in output_field_values * * In the following, the arrays are dimensioned using the notation (from Intrepid's doc): * * [C] - num. integration domains (cells/elements) * [F] - num. Intrepid "fields" (number of bases within an element == num. nodes typically) * [P] - num. integration (or interpolation) points within the element * [D] - spatial dimension * [D1], [D2] - spatial dimension * * Locally, we introduce this notation: * * [DOF] - number of degrees-of-freedom per node of the interpolated stk Field. For example, a vector field in 3D has [DOF] = 3 * * Dimensions of input_phy_points are required to be either ([D]) or ([P],[D]) * Dimensions of output_field_values are required to be ([DOF]) or ([P],[DOF]) respectively * * [R] is used for the rank of MDArray's */ void FieldFunction::operator()(MDArray& input_phy_points, MDArray& output_field_values, double time) { EXCEPTWATCH; argsAreValid(input_phy_points, output_field_values); m_found_on_local_owned_part = false; //// single point only (for now) unsigned found_it = 0; int D_ = last_dimension(input_phy_points); MDArray found_parametric_coordinates_one(1, D_); setup_searcher(D_); MDArray output_field_values_local = output_field_values; int R_output = output_field_values.rank(); int R_input = input_phy_points.rank(); int P_ = (R_input == 1 ? 1 : input_phy_points.dimension(R_input-2)); // FIXME for tensor valued fields int DOF_ = last_dimension(output_field_values_local); MDArray input_phy_points_one(1,D_); MDArray output_field_values_one(1,DOF_); int C_ = 1; if (R_input == 3) { C_ = input_phy_points.dimension(0); } for (int iC = 0; iC < C_; iC++) { for (int iP = 0; iP < P_; iP++) { for (int iD = 0; iD < D_; iD++) { switch(R_input) { case 1: input_phy_points_one(0, iD) = input_phy_points(iD); break; case 2: input_phy_points_one(0, iD) = input_phy_points(iP, iD); break; case 3: input_phy_points_one(0, iD) = input_phy_points(iC, iP, iD); break; default: VERIFY_1("bad rank"); } } const stk_classic::mesh::Entity *found_element = 0; { EXCEPTWATCH; //if (m_searchType==STK_SEARCH) std::cout << "find" << std::endl; found_element = m_searcher->findElement(input_phy_points_one, found_parametric_coordinates_one, found_it, m_cachedElement); //if (m_searchType==STK_SEARCH) std::cout << "find..done found_it=" << found_it << std::endl; } // if found element on the local owned part, evaluate if (found_it) { m_found_on_local_owned_part = true; if (( EXTRA_PRINT) && m_searchType==STK_SEARCH) std::cout << "FieldFunction::operator() found element # = " << found_element->identifier() << std::endl; (*this)(input_phy_points_one, output_field_values_one, *found_element, found_parametric_coordinates_one); for (int iDOF = 0; iDOF < DOF_; iDOF++) { switch (R_output) { case 1: output_field_values_local( iDOF) = output_field_values_one(0, iDOF); break; case 2: output_field_values_local(iP, iDOF) = output_field_values_one(0, iDOF); break; case 3: output_field_values_local(iC, iP, iDOF) = output_field_values_one(0, iDOF); break; default: VERIFY_1("bad rank"); } } } else { if (!m_parallelEval) { std::cout << "P[" << Util::get_rank() << "] FieldFunction::operator() found_it = " << found_it << " points= " << input_phy_points_one << std::endl; throw std::runtime_error("FieldFunction::operator() in local eval mode and didn't find element - logic error"); } double max_val = std::numeric_limits<double>::max(); output_field_values_local.initialize(max_val); } // make sure it is found somewhere if (m_parallelEval) { all_reduce( m_bulkData->parallel() , ReduceMax<1>( & found_it ) ); } if (EXTRA_PRINT) std::cout << "FieldFunction::operator() global found_it = " << found_it << std::endl; if (!found_it) { throw std::runtime_error("FieldFunction::operator() couldn't find element"); } if (m_parallelEval) { stk_percept_global_lex_min( m_bulkData->parallel(), output_field_values.size(), &output_field_values_local[0], &output_field_values[0]); } else { output_field_values = output_field_values_local; } m_cachedElement = found_element; } } }
int main(int argc, char **argv){//MAIN MAIN MAIN mpi::environment env(argc, argv); mpi::communicator world; std::srand(135+world.rank()); int nodecount, max_i, max_j, max_k; if(world.rank()==0) { std::cout << "usage: ./population_test.exe testfilename\n"; FILE* nodefile = fopen(argv[1], "r"); if (nodefile==NULL) { std::cout << "error opening file "<<argv[1] << "\n"; return 1; } char oneline[100]; int line_count=0; while( fgets(oneline,100,nodefile)!=NULL){ if(oneline[0]=='#' || oneline[0]=='\n'){ std::cout << oneline; continue; }else{ line_count+=1; if(line_count==1){ sscanf(oneline,"%d%d%d%d",&nodecount,&max_i,&max_j,&max_k); }else{ gampi_nodelist.push_back((nodeid) atoi(oneline)); } } } } broadcast(world, max_i, 0); broadcast(world, max_j, 0); broadcast(world, max_k, 0); broadcast(world, gampi_nodelist, 0); gampi_domain=Domain(max_i, max_j, max_k); if(world.rank()==0){ Individual a; a.show((char*)"Ideal soln"); } Individual v(true); Population a(v, 1); //ancestor population Population e(v, 1); //store the elites time_t start=time(NULL); time_t lastprint=time(NULL); float currbest=v.get_fitness(); std::pair<float,int> fitrank, bestfitrank; int elapsed=0; const int pop_size=100000; const int subset_percent=5; //percentage of population that makes the subset for next gen const float maxruntime=300.0; // seconds const float maxgentime=maxruntime/30.0; // seconds, max time for creating one generation do { float lastbest=currbest; Population p(a, pop_size, maxgentime); //population expanded from ancestors //Select a subset to be used for creating next generation e=p.elitist_selection(std::max(1, p.get_size()*subset_percent/100/10)); currbest=e.get_best_fitness(); if(currbest>lastbest*0.99) { a=p.rank_selection (std::max(1, p.get_size()*subset_percent/100)); } else { a=p.elitist_selection(std::max(1, p.get_size()*subset_percent/100)); } //Add someone else's elites to mine if(world.size()>1){ int comm_offset; if(world.rank()==0) comm_offset=1+(rand()%(world.size()-1)); // goes from 1 to N-1 broadcast(world, comm_offset, 0); int src_rank=(world.size()+world.rank()+comm_offset)%world.size(); // I receive from int dst_rank=(world.size()+world.rank()-comm_offset)%world.size(); // I send to mpi::request reqs[2]; Population r(v, 1); reqs[0]=world.isend(dst_rank, 307, e); reqs[1]=world.irecv(src_rank, 307, r); mpi::wait_all(reqs, reqs+2); a+=r; } time_t current=time(NULL); elapsed=difftime(current, start); broadcast(world, elapsed, 0); //So they all stop at the same iteration fitrank=std::make_pair(currbest, world.rank()); all_reduce(world, fitrank, bestfitrank, mpi::minimum<std::pair<float,int> >()); if (world.rank()==0 && difftime(current, lastprint)>=1) { printf("%3d (secs): %8.5f\n", elapsed, bestfitrank.first); lastprint=time(NULL); } } while(elapsed<maxruntime); Individual best; if(bestfitrank.second==0 && world.rank()==0) { best=e.get_individual(0); best.show((char*) "Best soln"); } else if(world.rank()==bestfitrank.second) { best=e.get_individual(0); world.send(0, 816, best); } else if (world.rank()==0) { world.recv(bestfitrank.second, 816, best); best.show((char*) "Best soln"); } } // end MAIN MAIN MAIN
inline void all_reduce(const communicator& comm, inplace_t<T*> inout_values, int n, Op op) { all_reduce(comm, static_cast<const T*>(MPI_IN_PLACE), n, inout_values.buffer, op); }
void BulkData::change_entity_owner( const std::vector<EntityProc> & arg_change ) { static const char method[] = "stk::mesh::BulkData::change_entity_owner" ; const MetaData & meta = m_mesh_meta_data ; const unsigned p_rank = m_parallel_rank ; const unsigned p_size = m_parallel_size ; ParallelMachine p_comm = m_parallel_machine ; //------------------------------ // Verify the input changes, generate a clean local change list, and // generate the remote change list so that all processes know about // pending changes. std::vector<EntityProc> local_change( arg_change ); // Parallel synchronous clean up and verify the requested changes: clean_and_verify_parallel_change( method , *this , local_change ); //---------------------------------------- // Parallel synchronous determination of changing // shared and ghosted. std::vector<EntityProc> ghosted_change ; std::vector<EntityProc> shared_change ; generate_parallel_change( *this , local_change , shared_change , ghosted_change ); //------------------------------ // Have enough information to delete all effected ghosts. // If the closure of a ghost contains a changing entity // then that ghost must be deleted. // Request that all ghost entities in the closure of the ghost be deleted. typedef std::set<EntityProc,EntityLess> EntityProcSet; typedef std::set<Entity*,EntityLess> EntitySet; // Closure of the owner change for impacted ghost entities. EntityProcSet send_closure ; for ( std::vector<EntityProc>::iterator i = local_change.begin() ; i != local_change.end() ; ++i ) { insert_closure_send( *i , send_closure ); } { EntitySet work ; for ( std::vector<EntityProc>::const_iterator i = ghosted_change.begin() ; i != ghosted_change.end() ; ++i ) { insert_transitive_ghost( i->first , m_parallel_rank , work ); } for ( std::vector<EntityProc>::const_iterator i = shared_change.begin() ; i != shared_change.end() ; ++i ) { insert_transitive_ghost( i->first , m_parallel_rank , work ); } for ( EntityProcSet::iterator i = send_closure.begin() ; i != send_closure.end() ; ++i ) { insert_transitive_ghost( i->first , m_parallel_rank , work ); } // The ghosted change list will become invalid ghosted_change.clear(); std::vector<EntityProc> empty ; std::vector<Entity*> effected_ghosts( work.begin() , work.end() ); // Skip 'm_ghosting[0]' which is the shared subset. for ( std::vector<Ghosting*>::iterator ig = m_ghosting.begin() + 1 ; ig != m_ghosting.end() ; ++ig ) { // parallel synchronous: internal_change_ghosting( **ig , empty , effected_ghosts ); } } //------------------------------ // Consistently change the owner on all processes. // 1) The local_change list is giving away ownership. // 2) The shared_change may or may not be receiving ownership { PartVector owned( 1 ); owned[0] = & meta.locally_owned_part(); for ( std::vector<EntityProc>::iterator i = local_change.begin() ; i != local_change.end() ; ++i ) { // Giving ownership, change the parts first and then // the owner rank to pass the ownership test. change_entity_parts( * i->first , PartVector() , owned ); m_entity_repo.set_entity_owner_rank( *(i->first), i->second); } for ( std::vector<EntityProc>::iterator i = shared_change.begin() ; i != shared_change.end() ; ++i ) { m_entity_repo.set_entity_owner_rank( *(i->first), i->second); if ( p_rank == i->second ) { // I receive ownership change_entity_parts( * i->first , owned , PartVector() ); } } } //------------------------------ // Send entities, along with their closure, to the new owner processes { std::ostringstream error_msg ; int error_count = 0 ; CommAll comm( p_comm ); for ( std::set<EntityProc,EntityLess>::iterator i = send_closure.begin() ; i != send_closure.end() ; ++i ) { CommBuffer & buffer = comm.send_buffer( i->second ); Entity & entity = * i->first ; pack_entity_info( buffer , entity ); pack_field_values( buffer , entity ); } comm.allocate_buffers( p_size / 4 ); for ( std::set<EntityProc,EntityLess>::iterator i = send_closure.begin() ; i != send_closure.end() ; ++i ) { CommBuffer & buffer = comm.send_buffer( i->second ); Entity & entity = * i->first ; pack_entity_info( buffer , entity ); pack_field_values( buffer , entity ); } comm.communicate(); for ( unsigned p = 0 ; p < p_size ; ++p ) { CommBuffer & buf = comm.recv_buffer(p); while ( buf.remaining() ) { PartVector parts ; std::vector<Relation> relations ; EntityKey key ; unsigned owner = ~0u ; unpack_entity_info( buf, *this, key, owner, parts, relations ); // Received entity information will be correct, // modulo the owned and shared parts remove( parts , meta.globally_shared_part() ); if ( owner == p_rank ) { // Must have the locally_owned_part insert( parts , meta.locally_owned_part() ); } else { // Must not have the locally_owned_part remove( parts , meta.locally_owned_part() ); } std::pair<Entity*,bool> result = m_entity_repo.internal_create_entity( key ); m_entity_repo.log_created_parallel_copy( *(result.first) ); // The entity was copied and not created. m_entity_repo.set_entity_owner_rank( *(result.first), owner); internal_change_entity_parts( *result.first , parts , PartVector() ); declare_relation( *result.first , relations ); if ( ! unpack_field_values( buf , * result.first , error_msg ) ) { ++error_count ; } } } all_reduce( p_comm , ReduceSum<1>( & error_count ) ); if ( error_count ) { throw std::runtime_error( error_msg.str() ); } // Any entity that I sent and is not in an owned closure is deleted. // The owned closure will be effected by received entities, so can // only clean up after the newly owned entities have been received. // Destroy backwards so as not to invalidate closures in the process. { Entity * entity = NULL ; for ( std::set<EntityProc,EntityLess>::iterator i = send_closure.end() ; i != send_closure.begin() ; ) { Entity * e = (--i)->first ; // The same entity may be sent to more than one process. // Only evaluate it once. if ( entity != e ) { entity = e ; if ( ! member_of_owned_closure( *e , p_rank ) ) { if ( ! destroy_entity( e ) ) { throw std::logic_error(std::string("BulkData::destroy_entity FAILED")); } } } } } send_closure.clear(); // Has been invalidated } }
void outer_mpi(mpi::communicator world, pt1D* ptVec, /* vector of points */ real2D* matrix, /* matrix to fill */ real1D* realVec, /* vector to fill */ int n /* size */ ){ int lo, hi; /* work controls */ int r, c; /* loop indices */ real d; /* distance */ real d_max_local = -1.0; // maximum distance real d_max; // maximum distance bool work; /* do useful work? */ int i, j; /* all elements except matrix diagonal */ work = get_block_rows_mpi (world, 0, n, &lo, &hi); if (work) { for (r = lo; r < hi; r++) { realVec[r] = ptMag(&(ptVec[r])); for (c = 0; c < r; c++) { d = ptDist (&(ptVec[r]), &(ptVec[c])); if (d > d_max_local) { d_max_local = d; } // fill columns 0 to r only matrix[r][c] = d; } } } // reduce to maximum d's all_reduce (world, d_max_local, d_max, mpi::maximum<real>()); /* matrix diagonal */ d = d_max * n; if (work) { for (r = lo; r < hi; r++) { matrix[r][r] = d; } } // broadcast matrix, realVec for (i = 0; i < world.size (); i++) { if (get_block_rows_mpi (world, 0, n, &lo, &hi, i)) { broadcast (world, &realVec[lo], hi - lo, i); // broadcast row by row since n may be smaller than MAXEXT for (j = lo; j < hi; j++) { broadcast (world, matrix[j], n, i); } } } // fill in the rest to make symmetric matrix for (r = 0; r < n; r++) { for (c = 0; c < r; c++) { matrix[c][r] = matrix[r][c]; } } /* return */ }
// Initialize for edge generation scalable_rmat_iterator(ProcessGroup pg, Distribution distrib, RandomGenerator& gen, vertices_size_type n, edges_size_type m, double a, double b, double c, double d, bool permute_vertices = true) : gen(), done(false) { BOOST_ASSERT(a + b + c + d == 1); int id = process_id(pg); this->gen.reset(new uniform_01<RandomGenerator>(gen)); std::vector<vertices_size_type> vertexPermutation; if (permute_vertices) generate_permutation_vector(gen, vertexPermutation, n); int SCALE = int(floor(log(double(n))/log(2.))); boost::uniform_01<RandomGenerator> prob(gen); std::map<value_type, bool> edge_map; edges_size_type generated = 0, local_edges = 0; do { edges_size_type tossed = 0; do { vertices_size_type u, v; boost::tie(u, v) = generate_edge(this->gen, n, SCALE, a, b, c, d); if (permute_vertices) { u = vertexPermutation[u]; v = vertexPermutation[v]; } // Lowest vertex number always comes first (this // means we don't have to worry about i->j and j->i // being in the edge list) if (u > v && is_same<directed_category, undirected_tag>::value) std::swap(u, v); if (distrib(u) == id || distrib(v) == id) { if (edge_map.find(std::make_pair(u, v)) == edge_map.end()) { edge_map[std::make_pair(u, v)] = true; local_edges++; } else { tossed++; // special case - if both u and v are on same // proc, ++ twice, since we divide by two (to // cover the two process case) if (distrib(u) == id && distrib(v) == id) tossed++; } } generated++; } while (generated < m); tossed = all_reduce(pg, tossed, boost::parallel::sum<vertices_size_type>()); generated -= (tossed / 2); } while (generated < m); // NGE - Asking for more than n^2 edges will result in an infinite loop here // Asking for a value too close to n^2 edges may as well values.reserve(local_edges); typename std::map<value_type, bool>::reverse_iterator em_end = edge_map.rend(); for (typename std::map<value_type, bool>::reverse_iterator em_i = edge_map.rbegin(); em_i != em_end ; ++em_i) { values.push_back(em_i->first); } current = values.back(); values.pop_back(); }