int worker_thread::process_pending_vertex(int max) { int num_neighbors = min(max, curr_pending.get_num_neighbors()); if (num_neighbors <= 0) return 0; stack_array<io_request> reqs(num_neighbors); stack_array<vertex_id_t> remain_neighs(num_neighbors); int ret = curr_pending.fetch_neighbors(remain_neighs.data(), num_neighbors); assert(ret == num_neighbors); for (int j = 0; j < num_neighbors; j++) { vertex_id_t neighbor = remain_neighs[j]; compute_vertex &info = graph->get_vertex(neighbor); data_loc_t loc(io->get_file_id(), info.get_ext_mem_off()); reqs[j].init(new char[info.get_ext_mem_size()], loc, // TODO I might need to set the node id. info.get_ext_mem_size(), READ, io, -1); reqs[j].set_user_data(curr_pending.get_pending_vertex()); } if (graph->get_logger()) graph->get_logger()->log(reqs.data(), num_neighbors); io->access(reqs.data(), num_neighbors); return num_neighbors; }
std::unordered_set<std::string> computeDefunctWellNames(const std::vector<std::vector<int> >& wells_on_proc, const Opm::EclipseStateConstPtr eclipseState, const CollectiveCommunication<MPI_Comm>& cc, int root) { std::vector<const Opm::Well*> wells = eclipseState->getSchedule()->getWells(); std::vector<int> my_well_indices; const int well_information_tag = 267553; if( root == cc.rank() ) { std::vector<MPI_Request> reqs(cc.size(), MPI_REQUEST_NULL); my_well_indices = wells_on_proc[root]; for ( int i=0; i < cc.size(); ++i ) { if(i==root) { continue; } MPI_Isend(const_cast<int*>(wells_on_proc[i].data()), wells_on_proc[i].size(), MPI_INT, i, well_information_tag, cc, &reqs[i]); } std::vector<MPI_Status> stats(reqs.size()); MPI_Waitall(reqs.size(), reqs.data(), stats.data()); } else { MPI_Status stat; MPI_Probe(root, well_information_tag, cc, &stat); int msg_size; MPI_Get_count(&stat, MPI_INT, &msg_size); my_well_indices.resize(msg_size); MPI_Recv(my_well_indices.data(), msg_size, MPI_INT, root, well_information_tag, cc, &stat); } // Compute defunct wells in parallel run. std::vector<int> defunct_wells(wells.size(), true); for(auto well_index : my_well_indices) { defunct_wells[well_index] = false; } // We need to use well names as only they are consistent. std::unordered_set<std::string> defunct_well_names; for(auto defunct = defunct_wells.begin(); defunct != defunct_wells.end(); ++defunct) { if ( *defunct ) { defunct_well_names.insert(wells[defunct-defunct_wells.begin()]->name()); } } return defunct_well_names; }
void Master::terminate() { unsigned wcount = workers(); SimulationStep data(true, std::vector<unsigned>()); std::vector<boost::mpi::request>reqs(wcount); for(unsigned r=0; r < wcount; ++r) { reqs[r] = m_world.isend(r+1, MASTER_STEP, data); } boost::mpi::wait_all(reqs.begin(), reqs.end()); }
int main( int argc, char** argv ) { int numtasks = 0; MPI_( MPI_Init( &argc, &argv ) ); MPI_( MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN ) ); MPI_( MPI_Comm_size( MPI_COMM_WORLD, &numtasks ) ); const int DIM = int( std::sqrt( double( numtasks ) ) ); std::vector< int > dims( 2, DIM ); std::vector< int > periods( 2, 0 ); //periodic - false -> non-periodic const int reorder = 0; //false - no reorder MPI_Comm cartcomm; MPI_( MPI_Cart_create( MPI_COMM_WORLD, 2, &dims[ 0 ], &periods[ 0 ], reorder, &cartcomm ) ); int task = -1; MPI_( MPI_Comm_rank( cartcomm, &task ) ); std::vector< int > coords( 2, -1 ); MPI_( MPI_Cart_coords( cartcomm, task, 2, &coords[ 0 ] ) ); std::vector< int > neighbors( 4, -1 ); enum { UP = 0, DOWN, LEFT, RIGHT }; // compute the shifted source and destination ranks, given a shift direction and amount //MPI_Cart_shift is uses to find two "nearby" neighbors of the calling process //along a specified direction of an N-dimensional grid //The direction and offset are specified as a signed integer //If the sign of the displacement is positive the "source" rank is lower //than the destination rank; if it's negative the opposite is true MPI_( MPI_Cart_shift( cartcomm, 0, 1, &neighbors[ UP ], &neighbors[ DOWN ] ) ); MPI_( MPI_Cart_shift( cartcomm, 1, 1, &neighbors[ LEFT ], &neighbors[ RIGHT ] ) ); int sendbuf = task; const int tag = 0x01; std::vector< int > recvbuf( 4, MPI_PROC_NULL ); std::vector< MPI_Request > reqs( 2 * 4 ); for( int i = 0; i != 4; ++i ) { int dest = neighbors[ i ]; int src = neighbors[ i ]; MPI_( MPI_Isend( &sendbuf, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &reqs[ i ] ) ); MPI_( MPI_Irecv( &recvbuf[ i ], 1, MPI_INT, src, tag, MPI_COMM_WORLD, &reqs[ i + 4 ] ) ); } std::vector< MPI_Status > status( 2 * 4 ); MPI_( MPI_Waitall( 8, &reqs[ 0 ], &status[ 0 ] ) ); std::ostringstream os; os << "rank= " << task << " coords= " << coords[ 0 ] << ',' << coords[ 1 ] << " neighbors= " << neighbors[ UP ] << ',' << neighbors[ DOWN ] << ',' << neighbors[ LEFT ] << ',' << neighbors[ RIGHT ] << '\n'; std::cout << os.str(); os.flush(); MPI_( MPI_Finalize() ); return 0; }
int worker_thread::process_activated_vertices(int max) { if (max <= 0) return 0; vertex_id_t vertex_buf[max]; stack_array<io_request> reqs(max); int num = graph->get_curr_activated_vertices(vertex_buf, max); for (int i = 0; i < num; i++) { compute_vertex &info = graph->get_vertex(vertex_buf[i]); data_loc_t loc(io->get_file_id(), info.get_ext_mem_off()); reqs[i].init(new char[info.get_ext_mem_size()], loc, // TODO I might need to set the node id. info.get_ext_mem_size(), READ, io, -1); } if (graph->get_logger()) graph->get_logger()->log(reqs.data(), num); io->access(reqs.data(), num); return num; }
void global_cached_io::process_all_requests() { // We first process the completed requests from the disk. // It will add completed user requests and pending requests to queues // for further processing. while (!completed_disk_queue.is_empty()) { int num = completed_disk_queue.get_num_entries(); stack_array<io_request> reqs(num); int ret = completed_disk_queue.fetch(reqs.data(), num); process_disk_completed_requests(reqs.data(), ret); } // Process the requests that are pending on the pages. // It may add completed user requests to queues for further processing. if (!pending_requests.is_empty()) handle_pending_requests(); // Process buffered user requests. // It may add completed user requests to queues for further processing. process_user_reqs(); std::vector<io_request> requests; // Process the completed requests served in the cache directly. process_cached_reqs(requests); // Process completed user requests. process_completed_requests(requests); // Process requests issued in the user compute. // We try to gather all requests so we can merge them. However, we only // have the local collection of the requests. We still need to rely on // the OS's elevator algorithm to merge the requests from different // global_cached_io. access(requests.data(), requests.size(), NULL); // Processing the pending requests on the pages might issue // more I/O requests. flush_requests(); }
int global_cached_io::process_completed_requests(std::vector<io_request> &requests) { int num = complete_queue.get_num_entries(); if (num > 0) { stack_array<original_io_request *> reqs(num); int ret = complete_queue.fetch(reqs.data(), num); for (int i = 0; i < ret; i++) { if (reqs[i]->get_req_type() == io_request::USER_COMPUTE) { // This is a user-compute request. assert(reqs[i]->get_req_type() == io_request::USER_COMPUTE); reqs[i]->compute(this, comp_allocator, requests); } } num_completed_areqs.inc(ret); ::notify_completion(this, (io_request **) reqs.data(), ret); for (int i = 0; i < ret; i++) { req_allocator->free(reqs[i]); } return ret; } else return 0; }
void GatherWorkspaces::exec() { // Every process in an MPI job must hit this next line or everything hangs! mpi::communicator world; // The communicator containing all processes inputWorkspace = getProperty("InputWorkspace"); // Create a new communicator that includes only those processes that have an // input workspace const int haveWorkspace(inputWorkspace ? 1 : 0); included = world.split(haveWorkspace); // If the present process doesn't have an input workspace then its work is // done if (!haveWorkspace) { g_log.information("No input workspace on this process, so nothing to do."); return; } // Get the number of bins in each workspace and check they're all the same numBins = inputWorkspace->blocksize(); std::vector<std::size_t> all_numBins; all_gather(included, numBins, all_numBins); if (std::count(all_numBins.begin(), all_numBins.end(), numBins) != (int)all_numBins.size()) { // All the processes will error out if all the workspaces don't have the // same number of bins throw Exception::MisMatch<std::size_t>( numBins, 0, "All input workspaces must have the same number of bins"); } // Also check that all workspaces are either histogram or not // N.B. boost mpi doesn't seem to like me using booleans in the all_gather hist = inputWorkspace->isHistogramData(); std::vector<int> all_hist; all_gather(included, hist, all_hist); if (std::count(all_hist.begin(), all_hist.end(), hist) != (int)all_hist.size()) { // All the processes will error out if we don't have either all histogram or // all point-data workspaces throw Exception::MisMatch<int>( hist, 0, "The input workspaces must be all histogram or all point data"); } // How do we accumulate the data? std::string accum = this->getPropertyValue("AccumulationMethod"); // Get the total number of spectra in the combined inputs totalSpec = inputWorkspace->getNumberHistograms(); sumSpec = totalSpec; if (accum == "Append") { reduce(included, totalSpec, sumSpec, std::plus<std::size_t>(), 0); } else if (accum == "Add") { // barrier only helps when memory is too low for communication // included.barrier(); } eventW = boost::dynamic_pointer_cast<const EventWorkspace>(inputWorkspace); if (eventW != NULL) { if (getProperty("PreserveEvents")) { // Input workspace is an event workspace. Use the other exec method this->execEvent(); return; } } // The root process needs to create a workspace of the appropriate size MatrixWorkspace_sptr outputWorkspace; if (included.rank() == 0) { g_log.debug() << "Total number of spectra is " << sumSpec << "\n"; // Create the workspace for the output outputWorkspace = WorkspaceFactory::Instance().create( inputWorkspace, sumSpec, numBins + hist, numBins); setProperty("OutputWorkspace", outputWorkspace); ExperimentInfo_sptr inWS = inputWorkspace; outputWorkspace->copyExperimentInfoFrom(inWS.get()); } for (size_t wi = 0; wi < totalSpec; wi++) { if (included.rank() == 0) { const ISpectrum *inSpec = inputWorkspace->getSpectrum(wi); if (accum == "Add") { outputWorkspace->dataX(wi) = inputWorkspace->readX(wi); reduce(included, inputWorkspace->readY(wi), outputWorkspace->dataY(wi), vplus(), 0); reduce(included, inputWorkspace->readE(wi), outputWorkspace->dataE(wi), eplus(), 0); } else if (accum == "Append") { // Copy over data from own input workspace outputWorkspace->dataX(wi) = inputWorkspace->readX(wi); outputWorkspace->dataY(wi) = inputWorkspace->readY(wi); outputWorkspace->dataE(wi) = inputWorkspace->readE(wi); const int numReqs(3 * (included.size() - 1)); std::vector<boost::mpi::request> reqs(numReqs); int j(0); // Receive data from all the other processes // This works because the process ranks are ordered the same in // 'included' as // they are in 'world', but in general this is not guaranteed. TODO: // robustify for (int i = 1; i < included.size(); ++i) { size_t index = wi + i * totalSpec; reqs[j++] = included.irecv(i, 0, outputWorkspace->dataX(index)); reqs[j++] = included.irecv(i, 1, outputWorkspace->dataY(index)); reqs[j++] = included.irecv(i, 2, outputWorkspace->dataE(index)); ISpectrum *outSpec = outputWorkspace->getSpectrum(index); outSpec->clearDetectorIDs(); outSpec->addDetectorIDs(inSpec->getDetectorIDs()); } // Make sure everything's been received before exiting the algorithm mpi::wait_all(reqs.begin(), reqs.end()); } ISpectrum *outSpec = outputWorkspace->getSpectrum(wi); outSpec->clearDetectorIDs(); outSpec->addDetectorIDs(inSpec->getDetectorIDs()); } else { if (accum == "Add") { reduce(included, inputWorkspace->readY(wi), vplus(), 0); reduce(included, inputWorkspace->readE(wi), eplus(), 0); } else if (accum == "Append") { std::vector<boost::mpi::request> reqs(3); // Send the spectrum to the root process reqs[0] = included.isend(0, 0, inputWorkspace->readX(0)); reqs[1] = included.isend(0, 1, inputWorkspace->readY(0)); reqs[2] = included.isend(0, 2, inputWorkspace->readE(0)); // Make sure the sends have completed before exiting the algorithm mpi::wait_all(reqs.begin(), reqs.end()); } } } }
void io_stream_t::conn_proc(int fd, netaddr_t *netaddr) const { fd_guard_t fd_guard(fd); class netaddr_guard_t { netaddr_t *netaddr; public: inline netaddr_guard_t(netaddr_t *_netaddr) throw() : netaddr(_netaddr) { } inline ~netaddr_guard_t() throw() { delete netaddr; } } netaddr_guard(netaddr); bq_fd_setup(fd); bq_conn_t *conn = transport.new_connect(fd, ctl(), remote_errors); class conn_guard_t { bq_conn_t *conn; io_stream::mmconns_t &mmconns; public: inline conn_guard_t(bq_conn_t *_conn, stat_t &stat) throw() : conn(_conn), mmconns(stat.mmconns()) { ++mmconns; } inline ~conn_guard_t() throw() { --mmconns; delete conn; } } conn_guard(conn, stat); conn->setup_accept(); netaddr_t const &local_addr = bind_addr(); bq_in_t in(*conn, ibuf_size, &stat.icount()); for(bool work = true; work;) { { char obuf[obuf_size]; bq_out_t out(*conn, obuf, sizeof(obuf), &stat.ocount()); in_t::ptr_t ptr(in); do { in.timeout_set(timeout); out.timeout_set(timeout); work = proto.request_proc(ptr, out, local_addr, *netaddr); ++stat.reqs(); if(!work) break; } while(in.truncate(ptr)); } if(work) { short int events = POLLIN; interval_t _keepalive = keepalive; if(bq_poll(fd, events, &_keepalive) < 0) break; } } conn->shutdown(); }