예제 #1
int worker_thread::process_pending_vertex(int max)
	int num_neighbors = min(max, curr_pending.get_num_neighbors());
	if (num_neighbors <= 0)
		return 0;

	stack_array<io_request> reqs(num_neighbors);
	stack_array<vertex_id_t> remain_neighs(num_neighbors);

	int ret = curr_pending.fetch_neighbors(remain_neighs.data(),
	assert(ret == num_neighbors);
	for (int j = 0; j < num_neighbors; j++) {
		vertex_id_t neighbor = remain_neighs[j];
		compute_vertex &info = graph->get_vertex(neighbor);
		data_loc_t loc(io->get_file_id(), info.get_ext_mem_off());
		reqs[j].init(new char[info.get_ext_mem_size()], loc,
				// TODO I might need to set the node id.
				info.get_ext_mem_size(), READ, io, -1);
	if (graph->get_logger())
		graph->get_logger()->log(reqs.data(), num_neighbors);
	io->access(reqs.data(), num_neighbors);
	return num_neighbors;
예제 #2
computeDefunctWellNames(const std::vector<std::vector<int> >& wells_on_proc,
                        const Opm::EclipseStateConstPtr eclipseState,
                        const CollectiveCommunication<MPI_Comm>& cc,
                        int root)
    std::vector<const Opm::Well*>  wells  = eclipseState->getSchedule()->getWells();
    std::vector<int> my_well_indices;
    const int well_information_tag = 267553;

    if( root == cc.rank() )
        std::vector<MPI_Request> reqs(cc.size(), MPI_REQUEST_NULL);
        my_well_indices = wells_on_proc[root];
        for ( int i=0; i < cc.size(); ++i )
                      MPI_INT, i, well_information_tag, cc, &reqs[i]);
        std::vector<MPI_Status> stats(reqs.size());
        MPI_Waitall(reqs.size(), reqs.data(), stats.data());
        MPI_Status stat;
        MPI_Probe(root, well_information_tag, cc, &stat);
        int msg_size;
        MPI_Get_count(&stat, MPI_INT, &msg_size);
        MPI_Recv(my_well_indices.data(), msg_size, MPI_INT, root,
                 well_information_tag, cc, &stat);

    // Compute defunct wells in parallel run.
    std::vector<int> defunct_wells(wells.size(), true);

    for(auto well_index : my_well_indices)
        defunct_wells[well_index] = false;

    // We need to use well names as only they are consistent.
    std::unordered_set<std::string> defunct_well_names;

    for(auto defunct = defunct_wells.begin(); defunct != defunct_wells.end(); ++defunct)
        if ( *defunct )

    return defunct_well_names;
예제 #3
	unsigned wcount = workers();
	SimulationStep data(true, std::vector<unsigned>());
	for(unsigned r=0; r < wcount; ++r) {
		reqs[r] = m_world.isend(r+1, MASTER_STEP, data);
	boost::mpi::wait_all(reqs.begin(), reqs.end());
예제 #4
int main( int argc, char** argv ) {

    int numtasks = 0; 
    MPI_( MPI_Init( &argc, &argv ) );
    MPI_( MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN ) );
    MPI_( MPI_Comm_size( MPI_COMM_WORLD, &numtasks ) );
    const int DIM = int( std::sqrt( double( numtasks ) ) );
    std::vector< int > dims( 2, DIM );
    std::vector< int > periods( 2, 0 ); //periodic - false -> non-periodic
    const int reorder = 0; //false - no reorder
    MPI_Comm cartcomm;
    MPI_( MPI_Cart_create( MPI_COMM_WORLD, 2, &dims[ 0 ], &periods[ 0 ], reorder, &cartcomm ) ); 
    int task = -1;
    MPI_( MPI_Comm_rank( cartcomm, &task ) );
    std::vector< int > coords( 2, -1 );
    MPI_( MPI_Cart_coords( cartcomm, task, 2, &coords[ 0 ] ) );
    std::vector< int > neighbors( 4, -1 );
    enum { UP = 0, DOWN, LEFT, RIGHT };
    // compute the shifted source and destination ranks, given a shift direction and amount
    //MPI_Cart_shift is uses to find two "nearby" neighbors of the calling process
    //along a specified direction of an N-dimensional grid
    //The direction and offset are specified as a signed integer
    //If the sign of the displacement is positive the "source" rank is lower
    //than the destination rank; if it's negative the opposite is true 
    MPI_( MPI_Cart_shift( cartcomm, 0, 1, &neighbors[ UP ],   &neighbors[ DOWN ] ) );
    MPI_( MPI_Cart_shift( cartcomm, 1, 1, &neighbors[ LEFT ], &neighbors[ RIGHT ] ) );
    int sendbuf = task;
    const int tag = 0x01;
    std::vector< int > recvbuf( 4, MPI_PROC_NULL ); 
    std::vector< MPI_Request > reqs( 2 * 4 );
    for( int i = 0; i != 4; ++i ) {
        int dest = neighbors[ i ];
        int src  = neighbors[ i ];
        MPI_( MPI_Isend( &sendbuf, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &reqs[ i ] ) );
        MPI_( MPI_Irecv( &recvbuf[ i ], 1, MPI_INT, src, tag, MPI_COMM_WORLD, &reqs[ i + 4 ] ) );
    std::vector< MPI_Status  > status( 2 * 4 ); 
    MPI_( MPI_Waitall( 8, &reqs[ 0 ], &status[ 0 ] ) );

    std::ostringstream os;
    os << "rank= " << task << " coords= " << coords[ 0 ] << ',' << coords[ 1 ]
       << " neighbors= " << neighbors[ UP ] << ',' << neighbors[ DOWN ] << ','
       << neighbors[ LEFT ] << ',' << neighbors[ RIGHT ] << '\n';
    std::cout << os.str(); os.flush();
    MPI_( MPI_Finalize() );
    return 0;
예제 #5
int worker_thread::process_activated_vertices(int max)
	if (max <= 0)
		return 0;

	vertex_id_t vertex_buf[max];
	stack_array<io_request> reqs(max);
	int num = graph->get_curr_activated_vertices(vertex_buf, max);
	for (int i = 0; i < num; i++) {
		compute_vertex &info = graph->get_vertex(vertex_buf[i]);
		data_loc_t loc(io->get_file_id(), info.get_ext_mem_off());
		reqs[i].init(new char[info.get_ext_mem_size()], loc,
				// TODO I might need to set the node id.
				info.get_ext_mem_size(), READ, io, -1);
	if (graph->get_logger())
		graph->get_logger()->log(reqs.data(), num);
	io->access(reqs.data(), num);
	return num;
예제 #6
void global_cached_io::process_all_requests()
	// We first process the completed requests from the disk.
	// It will add completed user requests and pending requests to queues
	// for further processing.
	while (!completed_disk_queue.is_empty()) {
		int num = completed_disk_queue.get_num_entries();
		stack_array<io_request> reqs(num);
		int ret = completed_disk_queue.fetch(reqs.data(), num);
		process_disk_completed_requests(reqs.data(), ret);

	// Process the requests that are pending on the pages.
	// It may add completed user requests to queues for further processing. 
	if (!pending_requests.is_empty())

	// Process buffered user requests.
	// It may add completed user requests to queues for further processing. 

	std::vector<io_request> requests;
	// Process the completed requests served in the cache directly.

	// Process completed user requests.

	// Process requests issued in the user compute.
	// We try to gather all requests so we can merge them. However, we only
	// have the local collection of the requests. We still need to rely on
	// the OS's elevator algorithm to merge the requests from different
	// global_cached_io.
	access(requests.data(), requests.size(), NULL);

	// Processing the pending requests on the pages might issue
	// more I/O requests.
예제 #7
int global_cached_io::process_completed_requests(std::vector<io_request> &requests)
	int num = complete_queue.get_num_entries();
	if (num > 0) {
		stack_array<original_io_request *> reqs(num);
		int ret = complete_queue.fetch(reqs.data(), num);
		for (int i = 0; i < ret; i++) {
			if (reqs[i]->get_req_type() == io_request::USER_COMPUTE) {
				// This is a user-compute request.
				assert(reqs[i]->get_req_type() == io_request::USER_COMPUTE);
				reqs[i]->compute(this, comp_allocator, requests);
		::notify_completion(this, (io_request **) reqs.data(), ret);
		for (int i = 0; i < ret; i++) {
		return ret;
		return 0;
예제 #8
void GatherWorkspaces::exec() {
  // Every process in an MPI job must hit this next line or everything hangs!
  mpi::communicator world; // The communicator containing all processes

  inputWorkspace = getProperty("InputWorkspace");

  // Create a new communicator that includes only those processes that have an
  // input workspace
  const int haveWorkspace(inputWorkspace ? 1 : 0);
  included = world.split(haveWorkspace);

  // If the present process doesn't have an input workspace then its work is
  // done
  if (!haveWorkspace) {
    g_log.information("No input workspace on this process, so nothing to do.");

  // Get the number of bins in each workspace and check they're all the same
  numBins = inputWorkspace->blocksize();
  std::vector<std::size_t> all_numBins;
  all_gather(included, numBins, all_numBins);
  if (std::count(all_numBins.begin(), all_numBins.end(), numBins) !=
      (int)all_numBins.size()) {
    // All the processes will error out if all the workspaces don't have the
    // same number of bins
    throw Exception::MisMatch<std::size_t>(
        numBins, 0, "All input workspaces must have the same number of bins");
  // Also check that all workspaces are either histogram or not
  // N.B. boost mpi doesn't seem to like me using booleans in the all_gather
  hist = inputWorkspace->isHistogramData();
  std::vector<int> all_hist;
  all_gather(included, hist, all_hist);
  if (std::count(all_hist.begin(), all_hist.end(), hist) !=
      (int)all_hist.size()) {
    // All the processes will error out if we don't have either all histogram or
    // all point-data workspaces
    throw Exception::MisMatch<int>(
        hist, 0,
        "The input workspaces must be all histogram or all point data");

  // How do we accumulate the data?
  std::string accum = this->getPropertyValue("AccumulationMethod");
  // Get the total number of spectra in the combined inputs
  totalSpec = inputWorkspace->getNumberHistograms();
  sumSpec = totalSpec;
  if (accum == "Append") {
    reduce(included, totalSpec, sumSpec, std::plus<std::size_t>(), 0);
  } else if (accum == "Add") {
    // barrier only helps when memory is too low for communication
    // included.barrier();

  eventW = boost::dynamic_pointer_cast<const EventWorkspace>(inputWorkspace);
  if (eventW != NULL) {
    if (getProperty("PreserveEvents")) {
      // Input workspace is an event workspace. Use the other exec method

  // The root process needs to create a workspace of the appropriate size
  MatrixWorkspace_sptr outputWorkspace;
  if (included.rank() == 0) {
    g_log.debug() << "Total number of spectra is " << sumSpec << "\n";
    // Create the workspace for the output
    outputWorkspace = WorkspaceFactory::Instance().create(
        inputWorkspace, sumSpec, numBins + hist, numBins);
    setProperty("OutputWorkspace", outputWorkspace);
    ExperimentInfo_sptr inWS = inputWorkspace;

  for (size_t wi = 0; wi < totalSpec; wi++) {
    if (included.rank() == 0) {
      const ISpectrum *inSpec = inputWorkspace->getSpectrum(wi);
      if (accum == "Add") {
        outputWorkspace->dataX(wi) = inputWorkspace->readX(wi);
        reduce(included, inputWorkspace->readY(wi), outputWorkspace->dataY(wi),
               vplus(), 0);
        reduce(included, inputWorkspace->readE(wi), outputWorkspace->dataE(wi),
               eplus(), 0);
      } else if (accum == "Append") {
        // Copy over data from own input workspace
        outputWorkspace->dataX(wi) = inputWorkspace->readX(wi);
        outputWorkspace->dataY(wi) = inputWorkspace->readY(wi);
        outputWorkspace->dataE(wi) = inputWorkspace->readE(wi);

        const int numReqs(3 * (included.size() - 1));
        std::vector<boost::mpi::request> reqs(numReqs);
        int j(0);

        // Receive data from all the other processes
        // This works because the process ranks are ordered the same in
        // 'included' as
        // they are in 'world', but in general this is not guaranteed. TODO:
        // robustify
        for (int i = 1; i < included.size(); ++i) {
          size_t index = wi + i * totalSpec;
          reqs[j++] = included.irecv(i, 0, outputWorkspace->dataX(index));
          reqs[j++] = included.irecv(i, 1, outputWorkspace->dataY(index));
          reqs[j++] = included.irecv(i, 2, outputWorkspace->dataE(index));
          ISpectrum *outSpec = outputWorkspace->getSpectrum(index);

        // Make sure everything's been received before exiting the algorithm
        mpi::wait_all(reqs.begin(), reqs.end());
      ISpectrum *outSpec = outputWorkspace->getSpectrum(wi);
    } else {
      if (accum == "Add") {
        reduce(included, inputWorkspace->readY(wi), vplus(), 0);
        reduce(included, inputWorkspace->readE(wi), eplus(), 0);
      } else if (accum == "Append") {
        std::vector<boost::mpi::request> reqs(3);

        // Send the spectrum to the root process
        reqs[0] = included.isend(0, 0, inputWorkspace->readX(0));
        reqs[1] = included.isend(0, 1, inputWorkspace->readY(0));
        reqs[2] = included.isend(0, 2, inputWorkspace->readE(0));

        // Make sure the sends have completed before exiting the algorithm
        mpi::wait_all(reqs.begin(), reqs.end());
예제 #9
void io_stream_t::conn_proc(int fd, netaddr_t *netaddr) const {
	fd_guard_t fd_guard(fd);

	class netaddr_guard_t {
		netaddr_t *netaddr;
		inline netaddr_guard_t(netaddr_t *_netaddr) throw() : netaddr(_netaddr) { }
		inline ~netaddr_guard_t() throw() { delete netaddr; }
	} netaddr_guard(netaddr);


	bq_conn_t *conn = transport.new_connect(fd, ctl(), remote_errors);

	class conn_guard_t {
		bq_conn_t *conn;
		io_stream::mmconns_t &mmconns;
		inline conn_guard_t(bq_conn_t *_conn, stat_t &stat) throw() :
			conn(_conn), mmconns(stat.mmconns()) {


		inline ~conn_guard_t() throw() {

			delete conn;
	} conn_guard(conn, stat);


	netaddr_t const &local_addr = bind_addr();

	bq_in_t in(*conn, ibuf_size, &stat.icount());

	for(bool work = true; work;) {
			char obuf[obuf_size];
			bq_out_t out(*conn, obuf, sizeof(obuf), &stat.ocount());

			in_t::ptr_t ptr(in);

			do {

				work = proto.request_proc(ptr, out, local_addr, *netaddr);


			} while(in.truncate(ptr));

		if(work) {
			short int events = POLLIN;
			interval_t _keepalive = keepalive;
			if(bq_poll(fd, events, &_keepalive) < 0)
