Example #1
0
/*
 * Receive the first message identifying initiator
 */
static void
_direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg)
{
	pmixp_io_engine_t *eng = pmixp_conn_get_eng(conn);
	pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t *)_hdr;
	pmixp_dconn_t *dconn = NULL;
	pmixp_conn_t *new_conn;
	eio_obj_t *obj;
	int fd;

	fd = pmixp_io_detach(eng);

	dconn = pmixp_dconn_accept(hdr->nodeid, fd);
	if (!dconn) {
		/* connection was refused because we already
		 * have established connection
		 * It seems that some sort of race condition occured
		 */
		char *nodename = pmixp_info_job_host(hdr->nodeid);
		close(fd);
		PMIXP_ERROR("Failed to accept direct connection from %s",
			    nodename);
		xfree(nodename);
		return;
	}
	new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT,
					  pmixp_dconn_engine(dconn),
					  _direct_new_msg_conn,
					  _direct_return_connection, dconn);
	pmixp_dconn_unlock(dconn);
	obj = eio_obj_create(fd, &direct_peer_ops, (void *)new_conn);
	eio_new_obj(pmixp_info_io(), obj);
	/* wakeup this connection to get processed */
	eio_signal_wakeup(pmixp_info_io());
}
Example #2
0
void pmixp_server_direct_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	pmixp_fd_set_nodelay(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_DIRECT, fd,
				   _direct_conn_establish);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &direct_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
	/* wakeup this connection to get processed */
	eio_signal_wakeup(pmixp_info_io());
}
Example #3
0
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmixp_server_slurm_conn(int fd)
{
	eio_obj_t *obj;
	pmixp_conn_t *conn = NULL;

	PMIXP_DEBUG("Request from fd = %d", fd);
	pmixp_debug_hang(0);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);
	conn = pmixp_conn_new_temp(PMIXP_PROTO_SLURM, fd, _slurm_new_msg);

	/* try to process right here */
	pmixp_conn_progress_rcv(conn);
	if (!pmixp_conn_is_alive(conn)) {
		/* success, don't need this connection anymore */
		pmixp_conn_return(conn);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &slurm_peer_ops, (void *)conn);
	eio_new_obj(pmixp_info_io(), obj);
}
/*
 * TODO: we need to keep track of the "me"
 * structures created here, because we need to
 * free them in "pmixp_stepd_finalize"
 */
void pmix_server_new_conn(int fd)
{
	eio_obj_t *obj;
	PMIXP_DEBUG("Request from fd = %d", fd);

	/* Set nonblocking */
	fd_set_nonblocking(fd);
	fd_set_close_on_exec(fd);

	pmixp_io_engine_t *me = xmalloc(sizeof(pmixp_io_engine_t));
	pmix_io_init(me, fd, srv_rcvd_header);
	/* We use slurm_forward_data to send message to stepd's
	 * SLURM will put user ID there. We need to skip it.
	 */
	pmix_io_rcvd_padding(me, sizeof(uint32_t));

	if( 2 == _process_message(me) ){
		/* connection was fully processed here */
		xfree(me);
		return;
	}

	/* If it is a blocking operation: create AIO object to
	 * handle it */
	obj = eio_obj_create(fd, &peer_ops, (void *)me);
	eio_new_obj(pmixp_info_io(), obj);
}
Example #5
0
static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf)
{
	char nhdr[PMIXP_BASE_HDR_MAX];
	bool send_init = false;
	size_t dsize = 0, hsize = 0;
	pmixp_dconn_t *dconn;
	_direct_proto_message_t *init_msg = NULL;
	int rc = SLURM_SUCCESS;
	char *ep_data = NULL;
	uint32_t ep_len = 0;

	dconn = pmixp_dconn_lock(hdr->nodeid);
	if (!dconn) {
		/* Should not happen */
		xassert( dconn );
		abort();
	}

	/* Retrieve endpoint information */
	_base_hdr_unpack_ext(buf, &ep_data, &ep_len);

	/* Check if init message is required to establish
	 * the connection
	 */
	if (!pmixp_dconn_require_connect(dconn, &send_init)) {
		goto unlock;
	}

	if (send_init) {
		Buf buf_init = pmixp_server_buf_new();
		pmixp_base_hdr_t bhdr;
		init_msg = xmalloc(sizeof(*init_msg));

		PMIXP_BASE_HDR_SETUP(bhdr, PMIXP_MSG_INIT_DIRECT, 0, buf_init);
		bhdr.ext_flag = 1;
		hsize = _direct_hdr_pack(&bhdr, nhdr);

		init_msg->sent_cb = pmixp_server_sent_buf_cb;
		init_msg->cbdata = buf_init;
		init_msg->hdr = bhdr;
		init_msg->buffer = _buf_finalize(buf_init, nhdr, hsize,
						 &dsize);
		init_msg->buf_ptr = buf_init;
	}

	rc = pmixp_dconn_connect(dconn, ep_data, ep_len, init_msg);
	if (rc) {
		PMIXP_ERROR("Unable to connect to %d", dconn->nodeid);
		if (init_msg) {
			/* need to release `init_msg` here */
			free_buf(init_msg->buf_ptr);
			xfree(init_msg);
		}
		goto unlock;
	}

	switch (pmixp_dconn_progress_type(dconn)) {
	case PMIXP_DCONN_PROGRESS_SW:{
		/* this direct connection has fd that needs to be
		 * polled to progress, use connection interface for that
		 */
		pmixp_io_engine_t *eng = pmixp_dconn_engine(dconn);
		pmixp_conn_t *conn;
		conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, eng,
					      _direct_new_msg_conn,
					      _direct_return_connection,
					      dconn);
		if (conn) {
			eio_obj_t *obj;
			obj = eio_obj_create(pmixp_io_fd(eng),
					     &direct_peer_ops,
					     (void *)conn);
			eio_new_obj(pmixp_info_io(), obj);
			eio_signal_wakeup(pmixp_info_io());
		} else {
			/* TODO: handle this error */
			rc = SLURM_ERROR;
			goto unlock;
		}
		break;
	}
	case PMIXP_DCONN_PROGRESS_HW: {
		break;
	}
	default:
		/* Should not happen */
		xassert(0 && pmixp_dconn_progress_type(dconn));
		/* TODO: handle this error */
	}
unlock:
	pmixp_dconn_unlock(dconn);
	return rc;
}