예제 #1
0
int worker_thread::process_pending_vertex(int max)
{
	int num_neighbors = min(max, curr_pending.get_num_neighbors());
	if (num_neighbors <= 0)
		return 0;

	stack_array<io_request> reqs(num_neighbors);
	stack_array<vertex_id_t> remain_neighs(num_neighbors);

	int ret = curr_pending.fetch_neighbors(remain_neighs.data(),
			num_neighbors);
	assert(ret == num_neighbors);
	for (int j = 0; j < num_neighbors; j++) {
		vertex_id_t neighbor = remain_neighs[j];
		compute_vertex &info = graph->get_vertex(neighbor);
		data_loc_t loc(io->get_file_id(), info.get_ext_mem_off());
		reqs[j].init(new char[info.get_ext_mem_size()], loc,
				// TODO I might need to set the node id.
				info.get_ext_mem_size(), READ, io, -1);
		reqs[j].set_user_data(curr_pending.get_pending_vertex());
	}
	if (graph->get_logger())
		graph->get_logger()->log(reqs.data(), num_neighbors);
	io->access(reqs.data(), num_neighbors);
	return num_neighbors;
}
예제 #2
0
std::unordered_set<std::string>
computeDefunctWellNames(const std::vector<std::vector<int> >& wells_on_proc,
                        const Opm::EclipseStateConstPtr eclipseState,
                        const CollectiveCommunication<MPI_Comm>& cc,
                        int root)
{
    std::vector<const Opm::Well*>  wells  = eclipseState->getSchedule()->getWells();
    std::vector<int> my_well_indices;
    const int well_information_tag = 267553;

    if( root == cc.rank() )
    {
        std::vector<MPI_Request> reqs(cc.size(), MPI_REQUEST_NULL);
        my_well_indices = wells_on_proc[root];
        for ( int i=0; i < cc.size(); ++i )
        {
            if(i==root)
            {
                continue;
            }
            MPI_Isend(const_cast<int*>(wells_on_proc[i].data()),
                      wells_on_proc[i].size(),
                      MPI_INT, i, well_information_tag, cc, &reqs[i]);
        }
        std::vector<MPI_Status> stats(reqs.size());
        MPI_Waitall(reqs.size(), reqs.data(), stats.data());
    }
    else
    {
        MPI_Status stat;
        MPI_Probe(root, well_information_tag, cc, &stat);
        int msg_size;
        MPI_Get_count(&stat, MPI_INT, &msg_size);
        my_well_indices.resize(msg_size);
        MPI_Recv(my_well_indices.data(), msg_size, MPI_INT, root,
                 well_information_tag, cc, &stat);
    }

    // Compute defunct wells in parallel run.
    std::vector<int> defunct_wells(wells.size(), true);

    for(auto well_index : my_well_indices)
    {
        defunct_wells[well_index] = false;
    }

    // We need to use well names as only they are consistent.
    std::unordered_set<std::string> defunct_well_names;

    for(auto defunct = defunct_wells.begin(); defunct != defunct_wells.end(); ++defunct)
    {
        if ( *defunct )
        {
            defunct_well_names.insert(wells[defunct-defunct_wells.begin()]->name());
        }
    }

    return defunct_well_names;
}
예제 #3
0
void
Master::terminate()
{
	unsigned wcount = workers();
	SimulationStep data(true, std::vector<unsigned>());
	std::vector<boost::mpi::request>reqs(wcount);
	for(unsigned r=0; r < wcount; ++r) {
		reqs[r] = m_world.isend(r+1, MASTER_STEP, data);
	}
	boost::mpi::wait_all(reqs.begin(), reqs.end());
}
예제 #4
0
int main( int argc, char** argv ) {

    int numtasks = 0; 
 
    MPI_( MPI_Init( &argc, &argv ) );
    MPI_( MPI_Errhandler_set( MPI_COMM_WORLD, MPI_ERRORS_RETURN ) );
    MPI_( MPI_Comm_size( MPI_COMM_WORLD, &numtasks ) );
    
    const int DIM = int( std::sqrt( double( numtasks ) ) );
    std::vector< int > dims( 2, DIM );
    std::vector< int > periods( 2, 0 ); //periodic - false -> non-periodic
    const int reorder = 0; //false - no reorder
    MPI_Comm cartcomm;
    MPI_( MPI_Cart_create( MPI_COMM_WORLD, 2, &dims[ 0 ], &periods[ 0 ], reorder, &cartcomm ) ); 
    int task = -1;
    MPI_( MPI_Comm_rank( cartcomm, &task ) );
    std::vector< int > coords( 2, -1 );
    MPI_( MPI_Cart_coords( cartcomm, task, 2, &coords[ 0 ] ) );
     
    std::vector< int > neighbors( 4, -1 );
    enum { UP = 0, DOWN, LEFT, RIGHT };
    // compute the shifted source and destination ranks, given a shift direction and amount
    //MPI_Cart_shift is uses to find two "nearby" neighbors of the calling process
    //along a specified direction of an N-dimensional grid
    //The direction and offset are specified as a signed integer
    //If the sign of the displacement is positive the "source" rank is lower
    //than the destination rank; if it's negative the opposite is true 
    MPI_( MPI_Cart_shift( cartcomm, 0, 1, &neighbors[ UP ],   &neighbors[ DOWN ] ) );
    MPI_( MPI_Cart_shift( cartcomm, 1, 1, &neighbors[ LEFT ], &neighbors[ RIGHT ] ) );
    int sendbuf = task;
    const int tag = 0x01;
    std::vector< int > recvbuf( 4, MPI_PROC_NULL ); 
    std::vector< MPI_Request > reqs( 2 * 4 );
    for( int i = 0; i != 4; ++i ) {
        int dest = neighbors[ i ];
        int src  = neighbors[ i ];
        MPI_( MPI_Isend( &sendbuf, 1, MPI_INT, dest, tag, MPI_COMM_WORLD, &reqs[ i ] ) );
        MPI_( MPI_Irecv( &recvbuf[ i ], 1, MPI_INT, src, tag, MPI_COMM_WORLD, &reqs[ i + 4 ] ) );
    }
    std::vector< MPI_Status  > status( 2 * 4 ); 
    MPI_( MPI_Waitall( 8, &reqs[ 0 ], &status[ 0 ] ) );

    std::ostringstream os;
    os << "rank= " << task << " coords= " << coords[ 0 ] << ',' << coords[ 1 ]
       << " neighbors= " << neighbors[ UP ] << ',' << neighbors[ DOWN ] << ','
       << neighbors[ LEFT ] << ',' << neighbors[ RIGHT ] << '\n';
    std::cout << os.str(); os.flush();
 
    MPI_( MPI_Finalize() );
    
    return 0;
}
예제 #5
0
int worker_thread::process_activated_vertices(int max)
{
	if (max <= 0)
		return 0;

	vertex_id_t vertex_buf[max];
	stack_array<io_request> reqs(max);
	int num = graph->get_curr_activated_vertices(vertex_buf, max);
	for (int i = 0; i < num; i++) {
		compute_vertex &info = graph->get_vertex(vertex_buf[i]);
		data_loc_t loc(io->get_file_id(), info.get_ext_mem_off());
		reqs[i].init(new char[info.get_ext_mem_size()], loc,
				// TODO I might need to set the node id.
				info.get_ext_mem_size(), READ, io, -1);
	}
	if (graph->get_logger())
		graph->get_logger()->log(reqs.data(), num);
	io->access(reqs.data(), num);
	return num;
}
예제 #6
0
void global_cached_io::process_all_requests()
{
	// We first process the completed requests from the disk.
	// It will add completed user requests and pending requests to queues
	// for further processing.
	while (!completed_disk_queue.is_empty()) {
		int num = completed_disk_queue.get_num_entries();
		stack_array<io_request> reqs(num);
		int ret = completed_disk_queue.fetch(reqs.data(), num);
		process_disk_completed_requests(reqs.data(), ret);
	}

	// Process the requests that are pending on the pages.
	// It may add completed user requests to queues for further processing. 
	if (!pending_requests.is_empty())
		handle_pending_requests();

	// Process buffered user requests.
	// It may add completed user requests to queues for further processing. 
	process_user_reqs();

	std::vector<io_request> requests;
	// Process the completed requests served in the cache directly.
	process_cached_reqs(requests);

	// Process completed user requests.
	process_completed_requests(requests);

	// Process requests issued in the user compute.
	// We try to gather all requests so we can merge them. However, we only
	// have the local collection of the requests. We still need to rely on
	// the OS's elevator algorithm to merge the requests from different
	// global_cached_io.
	access(requests.data(), requests.size(), NULL);

	// Processing the pending requests on the pages might issue
	// more I/O requests.
	flush_requests();
}
예제 #7
0
int global_cached_io::process_completed_requests(std::vector<io_request> &requests)
{
	int num = complete_queue.get_num_entries();
	if (num > 0) {
		stack_array<original_io_request *> reqs(num);
		int ret = complete_queue.fetch(reqs.data(), num);
		for (int i = 0; i < ret; i++) {
			if (reqs[i]->get_req_type() == io_request::USER_COMPUTE) {
				// This is a user-compute request.
				assert(reqs[i]->get_req_type() == io_request::USER_COMPUTE);
				reqs[i]->compute(this, comp_allocator, requests);
			}
		}
		num_completed_areqs.inc(ret);
		::notify_completion(this, (io_request **) reqs.data(), ret);
		for (int i = 0; i < ret; i++) {
			req_allocator->free(reqs[i]);
		}
		return ret;
	}
	else
		return 0;
}
예제 #8
0
void GatherWorkspaces::exec() {
  // Every process in an MPI job must hit this next line or everything hangs!
  mpi::communicator world; // The communicator containing all processes

  inputWorkspace = getProperty("InputWorkspace");

  // Create a new communicator that includes only those processes that have an
  // input workspace
  const int haveWorkspace(inputWorkspace ? 1 : 0);
  included = world.split(haveWorkspace);

  // If the present process doesn't have an input workspace then its work is
  // done
  if (!haveWorkspace) {
    g_log.information("No input workspace on this process, so nothing to do.");
    return;
  }

  // Get the number of bins in each workspace and check they're all the same
  numBins = inputWorkspace->blocksize();
  std::vector<std::size_t> all_numBins;
  all_gather(included, numBins, all_numBins);
  if (std::count(all_numBins.begin(), all_numBins.end(), numBins) !=
      (int)all_numBins.size()) {
    // All the processes will error out if all the workspaces don't have the
    // same number of bins
    throw Exception::MisMatch<std::size_t>(
        numBins, 0, "All input workspaces must have the same number of bins");
  }
  // Also check that all workspaces are either histogram or not
  // N.B. boost mpi doesn't seem to like me using booleans in the all_gather
  hist = inputWorkspace->isHistogramData();
  std::vector<int> all_hist;
  all_gather(included, hist, all_hist);
  if (std::count(all_hist.begin(), all_hist.end(), hist) !=
      (int)all_hist.size()) {
    // All the processes will error out if we don't have either all histogram or
    // all point-data workspaces
    throw Exception::MisMatch<int>(
        hist, 0,
        "The input workspaces must be all histogram or all point data");
  }

  // How do we accumulate the data?
  std::string accum = this->getPropertyValue("AccumulationMethod");
  // Get the total number of spectra in the combined inputs
  totalSpec = inputWorkspace->getNumberHistograms();
  sumSpec = totalSpec;
  if (accum == "Append") {
    reduce(included, totalSpec, sumSpec, std::plus<std::size_t>(), 0);
  } else if (accum == "Add") {
    // barrier only helps when memory is too low for communication
    // included.barrier();
  }

  eventW = boost::dynamic_pointer_cast<const EventWorkspace>(inputWorkspace);
  if (eventW != NULL) {
    if (getProperty("PreserveEvents")) {
      // Input workspace is an event workspace. Use the other exec method
      this->execEvent();
      return;
    }
  }

  // The root process needs to create a workspace of the appropriate size
  MatrixWorkspace_sptr outputWorkspace;
  if (included.rank() == 0) {
    g_log.debug() << "Total number of spectra is " << sumSpec << "\n";
    // Create the workspace for the output
    outputWorkspace = WorkspaceFactory::Instance().create(
        inputWorkspace, sumSpec, numBins + hist, numBins);
    setProperty("OutputWorkspace", outputWorkspace);
    ExperimentInfo_sptr inWS = inputWorkspace;
    outputWorkspace->copyExperimentInfoFrom(inWS.get());
  }

  for (size_t wi = 0; wi < totalSpec; wi++) {
    if (included.rank() == 0) {
      const ISpectrum *inSpec = inputWorkspace->getSpectrum(wi);
      if (accum == "Add") {
        outputWorkspace->dataX(wi) = inputWorkspace->readX(wi);
        reduce(included, inputWorkspace->readY(wi), outputWorkspace->dataY(wi),
               vplus(), 0);
        reduce(included, inputWorkspace->readE(wi), outputWorkspace->dataE(wi),
               eplus(), 0);
      } else if (accum == "Append") {
        // Copy over data from own input workspace
        outputWorkspace->dataX(wi) = inputWorkspace->readX(wi);
        outputWorkspace->dataY(wi) = inputWorkspace->readY(wi);
        outputWorkspace->dataE(wi) = inputWorkspace->readE(wi);

        const int numReqs(3 * (included.size() - 1));
        std::vector<boost::mpi::request> reqs(numReqs);
        int j(0);

        // Receive data from all the other processes
        // This works because the process ranks are ordered the same in
        // 'included' as
        // they are in 'world', but in general this is not guaranteed. TODO:
        // robustify
        for (int i = 1; i < included.size(); ++i) {
          size_t index = wi + i * totalSpec;
          reqs[j++] = included.irecv(i, 0, outputWorkspace->dataX(index));
          reqs[j++] = included.irecv(i, 1, outputWorkspace->dataY(index));
          reqs[j++] = included.irecv(i, 2, outputWorkspace->dataE(index));
          ISpectrum *outSpec = outputWorkspace->getSpectrum(index);
          outSpec->clearDetectorIDs();
          outSpec->addDetectorIDs(inSpec->getDetectorIDs());
        }

        // Make sure everything's been received before exiting the algorithm
        mpi::wait_all(reqs.begin(), reqs.end());
      }
      ISpectrum *outSpec = outputWorkspace->getSpectrum(wi);
      outSpec->clearDetectorIDs();
      outSpec->addDetectorIDs(inSpec->getDetectorIDs());
    } else {
      if (accum == "Add") {
        reduce(included, inputWorkspace->readY(wi), vplus(), 0);
        reduce(included, inputWorkspace->readE(wi), eplus(), 0);
      } else if (accum == "Append") {
        std::vector<boost::mpi::request> reqs(3);

        // Send the spectrum to the root process
        reqs[0] = included.isend(0, 0, inputWorkspace->readX(0));
        reqs[1] = included.isend(0, 1, inputWorkspace->readY(0));
        reqs[2] = included.isend(0, 2, inputWorkspace->readE(0));

        // Make sure the sends have completed before exiting the algorithm
        mpi::wait_all(reqs.begin(), reqs.end());
      }
    }
  }
}
예제 #9
0
void io_stream_t::conn_proc(int fd, netaddr_t *netaddr) const {
	fd_guard_t fd_guard(fd);

	class netaddr_guard_t {
		netaddr_t *netaddr;
	public:
		inline netaddr_guard_t(netaddr_t *_netaddr) throw() : netaddr(_netaddr) { }
		inline ~netaddr_guard_t() throw() { delete netaddr; }
	} netaddr_guard(netaddr);

	bq_fd_setup(fd);

	bq_conn_t *conn = transport.new_connect(fd, ctl(), remote_errors);

	class conn_guard_t {
		bq_conn_t *conn;
		io_stream::mmconns_t &mmconns;
	public:
		inline conn_guard_t(bq_conn_t *_conn, stat_t &stat) throw() :
			conn(_conn), mmconns(stat.mmconns()) {

			++mmconns;
		}

		inline ~conn_guard_t() throw() {
			--mmconns;

			delete conn;
		}
	} conn_guard(conn, stat);

	conn->setup_accept();

	netaddr_t const &local_addr = bind_addr();

	bq_in_t in(*conn, ibuf_size, &stat.icount());

	for(bool work = true; work;) {
		{
			char obuf[obuf_size];
			bq_out_t out(*conn, obuf, sizeof(obuf), &stat.ocount());

			in_t::ptr_t ptr(in);

			do {
				in.timeout_set(timeout);
				out.timeout_set(timeout);

				work = proto.request_proc(ptr, out, local_addr, *netaddr);
				++stat.reqs();

				if(!work)
					break;

			} while(in.truncate(ptr));
		}

		if(work) {
			short int events = POLLIN;
			interval_t _keepalive = keepalive;
			if(bq_poll(fd, events, &_keepalive) < 0)
				break;
		}
	}

	conn->shutdown();
}