/* Send slurm message with timeout
 * RET message size (as specified in argument) or SLURM_ERROR on error */
int _slurm_send_timeout(slurm_fd_t fd, char *buf, size_t size,
			uint32_t flags, int timeout)
{
	int rc;
	int sent = 0;
	int fd_flags;
	struct pollfd ufds;
	struct timeval tstart;
	int timeleft = timeout;
	char temp[2];

	ufds.fd     = fd;
	ufds.events = POLLOUT;

	fd_flags = _slurm_fcntl(fd, F_GETFL);
	fd_set_nonblocking(fd);

	gettimeofday(&tstart, NULL);

	while (sent < size) {
		timeleft = timeout - _tot_wait(&tstart);
		if (timeleft <= 0) {
			debug("_slurm_send_timeout at %d of %zd, timeout",
				sent, size);
			slurm_seterrno(SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT);
			sent = SLURM_ERROR;
			goto done;
		}

		if ((rc = poll(&ufds, 1, timeleft)) <= 0) {
			if ((rc == 0) || (errno == EINTR) || (errno == EAGAIN))
 				continue;
			else {
				debug("_slurm_send_timeout at %d of %zd, "
					"poll error: %s",
					sent, size, strerror(errno));
				slurm_seterrno(SLURM_COMMUNICATIONS_SEND_ERROR);
				sent = SLURM_ERROR;
				goto done;
			}
		}

		/*
		 * Check here to make sure the socket really is there.
		 * If not then exit out and notify the sender.  This
 		 * is here since a write doesn't always tell you the
		 * socket is gone, but getting 0 back from a
		 * nonblocking read means just that.
		 */
		if (ufds.revents & POLLERR) {
			debug("_slurm_send_timeout: Socket POLLERR");
			slurm_seterrno(ENOTCONN);
			sent = SLURM_ERROR;
			goto done;
		}
		if ((ufds.revents & POLLHUP) || (ufds.revents & POLLNVAL) ||
		    (_slurm_recv(fd, &temp, 1, flags) == 0)) {
			debug2("_slurm_send_timeout: Socket no longer there");
			slurm_seterrno(ENOTCONN);
			sent = SLURM_ERROR;
			goto done;
		}
		if ((ufds.revents & POLLOUT) != POLLOUT) {
			error("_slurm_send_timeout: Poll failure, revents:%d",
			      ufds.revents);
		}

		rc = _slurm_send(fd, &buf[sent], (size - sent), flags);
		if (rc < 0) {
 			if (errno == EINTR)
				continue;
			debug("_slurm_send_timeout at %d of %zd, "
				"send error: %s",
				sent, size, strerror(errno));
 			if (errno == EAGAIN) {	/* poll() lied to us */
				usleep(10000);
				continue;
			}
 			slurm_seterrno(SLURM_COMMUNICATIONS_SEND_ERROR);
			sent = SLURM_ERROR;
			goto done;
		}
		if (rc == 0) {
			debug("_slurm_send_timeout at %d of %zd, "
				"sent zero bytes", sent, size);
			slurm_seterrno(SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT);
			sent = SLURM_ERROR;
			goto done;
		}

		sent += rc;
	}

    done:
	/* Reset fd flags to prior state, preserve errno */
	if (fd_flags != SLURM_PROTOCOL_ERROR) {
		int slurm_err = slurm_get_errno();
		_slurm_fcntl(fd , F_SETFL , fd_flags);
		slurm_seterrno(slurm_err);
	}

	return sent;

}
Beispiel #2
0
int pmixp_server_send_nb(pmixp_ep_t *ep, pmixp_srv_cmd_t type,
			 uint32_t seq, Buf buf,
			 pmixp_server_sent_cb_t complete_cb,
			 void *cb_data)
{
	pmixp_base_hdr_t bhdr;
	int rc = SLURM_ERROR;
	pmixp_dconn_t *dconn = NULL;

	PMIXP_BASE_HDR_SETUP(bhdr, type, seq, buf);

	/* if direct connection is not enabled
	 * always use SLURM protocol
	 */
	if (!pmixp_info_srv_direct_conn()) {
		goto send_slurm;
	}

	switch (ep->type) {
	case PMIXP_EP_HLIST:
		goto send_slurm;
	case PMIXP_EP_NOIDEID:{
		int hostid;
		hostid = ep->ep.nodeid;
		xassert(0 <= hostid);
		dconn = pmixp_dconn_lock(hostid);
		switch (pmixp_dconn_state(dconn)) {
		case PMIXP_DIRECT_EP_SENT:
		case PMIXP_DIRECT_CONNECTED:
			/* keep the lock here and proceed
			 * to the direct send
			 */
			goto send_direct;
		case PMIXP_DIRECT_INIT:
			pmixp_dconn_req_sent(dconn);
			pmixp_dconn_unlock(dconn);
			goto send_slurm;
		default:{
			/* this is a bug! */
			pmixp_dconn_state_t state = pmixp_dconn_state(dconn);
			pmixp_dconn_unlock(dconn);
			PMIXP_ERROR("Bad direct connection state: %d",
				    (int)state);
			xassert( (state == PMIXP_DIRECT_INIT) ||
				 (state == PMIXP_DIRECT_EP_SENT) ||
				 (state == PMIXP_DIRECT_CONNECTED) );
			abort();
		}
		}
	}
	default:
		PMIXP_ERROR("Bad value of the endpoint type: %d",
			    (int)ep->type);
		xassert( PMIXP_EP_HLIST == ep->type ||
			 PMIXP_EP_NOIDEID == ep->type);
		abort();
	}

	return rc;
send_slurm:
	rc = _slurm_send(ep, bhdr, buf);
	complete_cb(rc, PMIXP_P2P_INLINE, cb_data);
	return SLURM_SUCCESS;
send_direct:
	xassert( NULL != dconn );
	_direct_send(dconn, ep, bhdr, buf, complete_cb, cb_data);
	pmixp_dconn_unlock(dconn);
	return SLURM_SUCCESS;
}