Пример #1
0
/* Tries to copy block <blk> at once into the channel's buffer after length
 * controls. The chn->o and to_forward pointers are updated. If the channel
 * input is closed, -2 is returned. If the block is too large for this buffer,
 * -3 is returned. If there is not enough room left in the buffer, -1 is
 * returned. Otherwise the number of bytes copied is returned (0 being a valid
 * number). Channel flag READ_PARTIAL is updated if some data can be
 * transferred. Channel flag CF_WAKE_WRITE is set if the write fails because
 * the buffer is full.
 */
int bi_putblk(struct channel *chn, const char *blk, int len)
{
	int max;

	if (unlikely(channel_input_closed(chn)))
		return -2;

	max = buffer_max_len(chn);
	if (unlikely(len > max - buffer_len(chn->buf))) {
		/* we can't write this chunk right now because the buffer is
		 * almost full or because the block is too large. Return the
		 * available space or -2 if impossible.
		 */
		if (len > max)
			return -3;

		chn->flags |= CF_WAKE_WRITE;
		return -1;
	}

	if (unlikely(len == 0))
		return 0;

	/* OK so the data fits in the buffer in one or two blocks */
	max = buffer_contig_space(chn->buf);
	memcpy(bi_end(chn->buf), blk, MIN(len, max));
	if (len > max)
		memcpy(chn->buf->data, blk + max, len - max);

	chn->buf->i += len;
	chn->total += len;
	if (chn->to_forward) {
		unsigned long fwd = len;
		if (chn->to_forward != CHN_INFINITE_FORWARD) {
			if (fwd > chn->to_forward)
				fwd = chn->to_forward;
			chn->to_forward -= fwd;
		}
		b_adv(chn->buf, fwd);
	}

	/* notify that some data was read from the SI into the buffer */
	chn->flags |= CF_READ_PARTIAL;
	return len;
}
Пример #2
0
/*
 * This function is called to send buffer data to a stream socket.
 * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll
 * before calling it again, otherwise 1. If a pipe was associated with the
 * buffer and it empties it, it releases it as well.
 */
static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b)
{
	int write_poll = MAX_WRITE_POLL_LOOPS;
	int retval = 1;
	int ret, max;

	if (unlikely(si->send_proxy_ofs)) {
		/* The target server expects a PROXY line to be sent first.
		 * If the send_proxy_ofs is negative, it corresponds to the
		 * offset to start sending from then end of the proxy string
		 * (which is recomputed every time since it's constant). If
		 * it is positive, it means we have to send from the start.
		 */
		ret = make_proxy_line(trash, sizeof(trash),
				      &b->prod->addr.from, &b->prod->addr.to);
		if (!ret)
			return -1;

		if (si->send_proxy_ofs > 0)
			si->send_proxy_ofs = -ret; /* first call */

		/* we have to send trash from (ret+sp for -sp bytes) */
		ret = send(si->fd, trash + ret + si->send_proxy_ofs, -si->send_proxy_ofs,
			   (b->flags & BF_OUT_EMPTY) ? 0 : MSG_MORE);
		if (ret > 0) {
			if (fdtab[si->fd].state == FD_STCONN)
				fdtab[si->fd].state = FD_STREADY;

			si->send_proxy_ofs += ret; /* becomes zero once complete */
			b->flags |= BF_WRITE_NULL; /* connect() succeeded */
		}
		else if (ret == 0 || errno == EAGAIN) {
			/* nothing written, we need to poll for write first */
			return 0;
		}
		else {
			/* bad, we got an error */
			return -1;
		}
	}

#if defined(CONFIG_HAP_LINUX_SPLICE)
	while (b->pipe) {
		ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data,
			     SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
		if (ret <= 0) {
			if (ret == 0 || errno == EAGAIN) {
				retval = 0;
				return retval;
			}
			/* here we have another error */
			retval = -1;
			return retval;
		}

		b->flags |= BF_WRITE_PARTIAL;
		b->pipe->data -= ret;

		if (!b->pipe->data) {
			put_pipe(b->pipe);
			b->pipe = NULL;
			break;
		}

		if (--write_poll <= 0)
			return retval;

		/* The only reason we did not empty the pipe is that the output
		 * buffer is full.
		 */
		return 0;
	}

	/* At this point, the pipe is empty, but we may still have data pending
	 * in the normal buffer.
	 */
#endif
	if (!b->send_max) {
		b->flags |= BF_OUT_EMPTY;
		return retval;
	}

	/* when we're in this loop, we already know that there is no spliced
	 * data left, and that there are sendable buffered data.
	 */
	while (1) {
		if (b->r > b->w)
			max = b->r - b->w;
		else
			max = b->data + b->size - b->w;

		/* limit the amount of outgoing data if required */
		if (max > b->send_max)
			max = b->send_max;

		/* check if we want to inform the kernel that we're interested in
		 * sending more data after this call. We want this if :
		 *  - we're about to close after this last send and want to merge
		 *    the ongoing FIN with the last segment.
		 *  - we know we can't send everything at once and must get back
		 *    here because of unaligned data
		 *  - there is still a finite amount of data to forward
		 * The test is arranged so that the most common case does only 2
		 * tests.
		 */

		if (MSG_NOSIGNAL && MSG_MORE) {
			unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;

			if ((!(b->flags & BF_NEVER_WAIT) &&
			    ((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) ||
			     (b->flags & BF_EXPECT_MORE))) ||
			    ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) ||
			    (max != b->l && max != b->send_max)) {
				send_flag |= MSG_MORE;
			}

			/* this flag has precedence over the rest */
			if (b->flags & BF_SEND_DONTWAIT)
				send_flag &= ~MSG_MORE;

			ret = send(si->fd, b->w, max, send_flag);
		} else {
			int skerr;
			socklen_t lskerr = sizeof(skerr);

			ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
			if (ret == -1 || skerr)
				ret = -1;
			else
				ret = send(si->fd, b->w, max, MSG_DONTWAIT);
		}

		if (ret > 0) {
			if (fdtab[si->fd].state == FD_STCONN)
				fdtab[si->fd].state = FD_STREADY;

			b->flags |= BF_WRITE_PARTIAL;

			b->w += ret;
			if (b->w == b->data + b->size)
				b->w = b->data; /* wrap around the buffer */

			b->l -= ret;
			if (likely(b->l < buffer_max_len(b)))
				b->flags &= ~BF_FULL;

			if (likely(!b->l))
				/* optimize data alignment in the buffer */
				b->r = b->w = b->lr = b->data;

			b->send_max -= ret;
			if (!b->send_max) {
				/* Always clear both flags once everything has been sent, they're one-shot */
				b->flags &= ~(BF_EXPECT_MORE | BF_SEND_DONTWAIT);
				if (likely(!b->pipe))
					b->flags |= BF_OUT_EMPTY;
				break;
			}

			/* if the system buffer is full, don't insist */
			if (ret < max)
				break;

			if (--write_poll <= 0)
				break;
		}
		else if (ret == 0 || errno == EAGAIN) {
			/* nothing written, we need to poll for write first */
			retval = 0;
			break;
		}
		else {
			/* bad, we got an error */
			retval = -1;
			break;
		}
	} /* while (1) */

	return retval;
}
Пример #3
0
/*
 * this function is called on a read event from a stream socket.
 * It returns 0 if we have a high confidence that we will not be
 * able to read more data without polling first. Returns non-zero
 * otherwise.
 */
int stream_sock_read(int fd) {
	struct stream_interface *si = fdtab[fd].owner;
	struct buffer *b = si->ib;
	int ret, max, retval, cur_read;
	int read_poll = MAX_READ_POLL_LOOPS;

#ifdef DEBUG_FULL
	fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner);
#endif

	retval = 1;

	/* stop immediately on errors. Note that we DON'T want to stop on
	 * POLL_ERR, as the poller might report a write error while there
	 * are still data available in the recv buffer. This typically
	 * happens when we send too large a request to a backend server
	 * which rejects it before reading it all.
	 */
	if (fdtab[fd].state == FD_STERROR)
		goto out_error;

	/* stop here if we reached the end of data */
	if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP)
		goto out_shutdown_r;

	/* maybe we were called immediately after an asynchronous shutr */
	if (b->flags & BF_SHUTR)
		goto out_wakeup;

#if defined(CONFIG_HAP_LINUX_SPLICE)
	if (b->to_forward >= MIN_SPLICE_FORWARD && b->flags & BF_KERN_SPLICING) {

		/* Under Linux, if FD_POLL_HUP is set, we have reached the end.
		 * Since older splice() implementations were buggy and returned
		 * EAGAIN on end of read, let's bypass the call to splice() now.
		 */
		if (fdtab[fd].ev & FD_POLL_HUP)
			goto out_shutdown_r;

		retval = stream_sock_splice_in(b, si);

		if (retval >= 0) {
			if (si->flags & SI_FL_ERR)
				goto out_error;
			if (b->flags & BF_READ_NULL)
				goto out_shutdown_r;
			goto out_wakeup;
		}
		/* splice not possible (anymore), let's go on on standard copy */
	}
#endif
	cur_read = 0;
	while (1) {
		max = buffer_max_len(b) - b->l;

		if (max <= 0) {
			b->flags |= BF_FULL;
			si->flags |= SI_FL_WAIT_ROOM;
			break;
		}

		/*
		 * 1. compute the maximum block size we can read at once.
		 */
		if (b->l == 0) {
			/* let's realign the buffer to optimize I/O */
			b->r = b->w = b->lr = b->data;
		}
		else if (b->r > b->w) {
			/* remaining space wraps at the end, with a moving limit */
			if (max > b->data + b->size - b->r)
				max = b->data + b->size - b->r;
		}
		/* else max is already OK */

		/*
		 * 2. read the largest possible block
		 */
		ret = recv(fd, b->r, max, 0);

		if (ret > 0) {
			b->r += ret;
			b->l += ret;
			cur_read += ret;

			/* if we're allowed to directly forward data, we must update send_max */
			if (b->to_forward && !(b->flags & (BF_SHUTW|BF_SHUTW_NOW))) {
				unsigned long fwd = ret;
				if (b->to_forward != BUF_INFINITE_FORWARD) {
					if (fwd > b->to_forward)
						fwd = b->to_forward;
					b->to_forward -= fwd;
				}
				b->send_max += fwd;
				b->flags &= ~BF_OUT_EMPTY;
			}

			if (fdtab[fd].state == FD_STCONN)
				fdtab[fd].state = FD_STREADY;

			b->flags |= BF_READ_PARTIAL;

			if (b->r == b->data + b->size) {
				b->r = b->data; /* wrap around the buffer */
			}

			b->total += ret;

			if (b->l >= buffer_max_len(b)) {
				/* The buffer is now full, there's no point in going through
				 * the loop again.
				 */
				if (!(b->flags & BF_STREAMER_FAST) && (cur_read == b->l)) {
					b->xfer_small = 0;
					b->xfer_large++;
					if (b->xfer_large >= 3) {
						/* we call this buffer a fast streamer if it manages
						 * to be filled in one call 3 consecutive times.
						 */
						b->flags |= (BF_STREAMER | BF_STREAMER_FAST);
						//fputc('+', stderr);
					}
				}
				else if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
					 (cur_read <= b->size / 2)) {
					b->xfer_large = 0;
					b->xfer_small++;
					if (b->xfer_small >= 2) {
						/* if the buffer has been at least half full twice,
						 * we receive faster than we send, so at least it
						 * is not a "fast streamer".
						 */
						b->flags &= ~BF_STREAMER_FAST;
						//fputc('-', stderr);
					}
				}
				else {
					b->xfer_small = 0;
					b->xfer_large = 0;
				}

				b->flags |= BF_FULL;
				si->flags |= SI_FL_WAIT_ROOM;
				break;
			}

			/* if too many bytes were missing from last read, it means that
			 * it's pointless trying to read again because the system does
			 * not have them in buffers. BTW, if FD_POLL_HUP was present,
			 * it means that we have reached the end and that the connection
			 * is closed.
			 */
			if (ret < max) {
				if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
				    (cur_read <= b->size / 2)) {
					b->xfer_large = 0;
					b->xfer_small++;
					if (b->xfer_small >= 3) {
						/* we have read less than half of the buffer in
						 * one pass, and this happened at least 3 times.
						 * This is definitely not a streamer.
						 */
						b->flags &= ~(BF_STREAMER | BF_STREAMER_FAST);
						//fputc('!', stderr);
					}
				}
				/* unfortunately, on level-triggered events, POLL_HUP
				 * is generally delivered AFTER the system buffer is
				 * empty, so this one might never match.
				 */
				if (fdtab[fd].ev & FD_POLL_HUP)
					goto out_shutdown_r;

				/* if a streamer has read few data, it may be because we
				 * have exhausted system buffers. It's not worth trying
				 * again.
				 */
				if (b->flags & BF_STREAMER)
					break;

				/* generally if we read something smaller than 1 or 2 MSS,
				 * it means that either we have exhausted the system's
				 * buffers (streamer or question-response protocol) or
				 * that the connection will be closed. Streamers are
				 * easily detected so we return early. For other cases,
				 * it's still better to perform a last read to be sure,
				 * because it may save one complete poll/read/wakeup cycle
				 * in case of shutdown.
				 */
				if (ret < MIN_RET_FOR_READ_LOOP && b->flags & BF_STREAMER)
					break;

				/* if we read a large block smaller than what we requested,
				 * it's almost certain we'll never get anything more.
				 */
				if (ret >= global.tune.recv_enough)
					break;
			}

			if ((b->flags & BF_READ_DONTWAIT) || --read_poll <= 0)
				break;
		}
		else if (ret == 0) {
			/* connection closed */
			goto out_shutdown_r;
		}
		else if (errno == EAGAIN) {
			/* Ignore EAGAIN but inform the poller that there is
			 * nothing to read left if we did not read much, ie
			 * less than what we were still expecting to read.
			 * But we may have done some work justifying to notify
			 * the task.
			 */
			if (cur_read < MIN_RET_FOR_READ_LOOP)
				retval = 0;
			break;
		}
		else {
			goto out_error;
		}
	} /* while (1) */

 out_wakeup:
	/* We might have some data the consumer is waiting for.
	 * We can do fast-forwarding, but we avoid doing this for partial
	 * buffers, because it is very likely that it will be done again
	 * immediately afterwards once the following data is parsed (eg:
	 * HTTP chunking).
	 */
	if (b->pipe || /* always try to send spliced data */
	    (b->send_max == b->l && (b->cons->flags & SI_FL_WAIT_DATA))) {
		int last_len = b->pipe ? b->pipe->data : 0;

		b->cons->chk_snd(b->cons);

		/* check if the consumer has freed some space */
		if (!(b->flags & BF_FULL) &&
		    (!last_len || !b->pipe || b->pipe->data < last_len))
			si->flags &= ~SI_FL_WAIT_ROOM;
	}

	if (si->flags & SI_FL_WAIT_ROOM) {
		EV_FD_CLR(fd, DIR_RD);
		b->rex = TICK_ETERNITY;
	}
	else if ((b->flags & (BF_SHUTR|BF_READ_PARTIAL|BF_FULL|BF_DONT_READ|BF_READ_NOEXP)) == BF_READ_PARTIAL)
		b->rex = tick_add_ifset(now_ms, b->rto);

	/* we have to wake up if there is a special event or if we don't have
	 * any more data to forward.
	 */
	if ((b->flags & (BF_READ_NULL|BF_READ_ERROR)) ||
	    si->state != SI_ST_EST ||
	    (si->flags & SI_FL_ERR) ||
	    ((b->flags & BF_READ_PARTIAL) && (!b->to_forward || b->cons->state != SI_ST_EST)))
		task_wakeup(si->owner, TASK_WOKEN_IO);

	if (b->flags & BF_READ_ACTIVITY)
		b->flags &= ~BF_READ_DONTWAIT;

	fdtab[fd].ev &= ~FD_POLL_IN;
	return retval;

 out_shutdown_r:
	/* we received a shutdown */
	fdtab[fd].ev &= ~FD_POLL_HUP;
	b->flags |= BF_READ_NULL;
	if (b->flags & BF_AUTO_CLOSE)
		buffer_shutw_now(b);
	stream_sock_shutr(si);
	goto out_wakeup;

 out_error:
	/* Read error on the file descriptor. We mark the FD as STERROR so
	 * that we don't use it anymore. The error is reported to the stream
	 * interface which will take proper action. We must not perturbate the
	 * buffer because the stream interface wants to ensure transparent
	 * connection retries.
	 */

	fdtab[fd].state = FD_STERROR;
	fdtab[fd].ev &= ~FD_POLL_STICKY;
	EV_FD_REM(fd);
	si->flags |= SI_FL_ERR;
	retval = 1;
	goto out_wakeup;
}
Пример #4
0
/* Return the version of the SSL protocol in the request. It supports both
 * SSLv3 (TLSv1) header format for any message, and SSLv2 header format for
 * the hello message. The SSLv3 format is described in RFC 2246 p49, and the
 * SSLv2 format is described here, and completed p67 of RFC 2246 :
 *    http://wp.netscape.com/eng/security/SSL_2.html
 *
 * Note: this decoder only works with non-wrapping data.
 */
static int
smp_fetch_req_ssl_ver(struct proxy *px, struct session *s, void *l7, unsigned int opt,
                      const struct arg *args, struct sample *smp, const char *kw)
{
	int version, bleft, msg_len;
	const unsigned char *data;

	if (!s || !s->req)
		return 0;

	msg_len = 0;
	bleft = s->req->buf->i;
	if (!bleft)
		goto too_short;

	data = (const unsigned char *)s->req->buf->p;
	if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) {
		/* SSLv3 header format */
		if (bleft < 5)
			goto too_short;

		version = (data[1] << 16) + data[2]; /* version: major, minor */
		msg_len = (data[3] <<  8) + data[4]; /* record length */

		/* format introduced with SSLv3 */
		if (version < 0x00030000)
			goto not_ssl;

		/* message length between 1 and 2^14 + 2048 */
		if (msg_len < 1 || msg_len > ((1<<14) + 2048))
			goto not_ssl;

		bleft -= 5; data += 5;
	} else {
		/* SSLv2 header format, only supported for hello (msg type 1) */
		int rlen, plen, cilen, silen, chlen;

		if (*data & 0x80) {
			if (bleft < 3)
				goto too_short;
			/* short header format : 15 bits for length */
			rlen = ((data[0] & 0x7F) << 8) | data[1];
			plen = 0;
			bleft -= 2; data += 2;
		} else {
			if (bleft < 4)
				goto too_short;
			/* long header format : 14 bits for length + pad length */
			rlen = ((data[0] & 0x3F) << 8) | data[1];
			plen = data[2];
			bleft -= 3; data += 2;
		}

		if (*data != 0x01)
			goto not_ssl;
		bleft--; data++;

		if (bleft < 8)
			goto too_short;
		version = (data[0] << 16) + data[1]; /* version: major, minor */
		cilen   = (data[2] <<  8) + data[3]; /* cipher len, multiple of 3 */
		silen   = (data[4] <<  8) + data[5]; /* session_id_len: 0 or 16 */
		chlen   = (data[6] <<  8) + data[7]; /* 16<=challenge length<=32 */

		bleft -= 8; data += 8;
		if (cilen % 3 != 0)
			goto not_ssl;
		if (silen && silen != 16)
			goto not_ssl;
		if (chlen < 16 || chlen > 32)
			goto not_ssl;
		if (rlen != 9 + cilen + silen + chlen)
			goto not_ssl;

		/* focus on the remaining data length */
		msg_len = cilen + silen + chlen + plen;
	}
	/* We could recursively check that the buffer ends exactly on an SSL
	 * fragment boundary and that a possible next segment is still SSL,
	 * but that's a bit pointless. However, we could still check that
	 * all the part of the request which fits in a buffer is already
	 * there.
	 */
	if (msg_len > buffer_max_len(s->req) + s->req->buf->data - s->req->buf->p)
		msg_len = buffer_max_len(s->req) + s->req->buf->data - s->req->buf->p;

	if (bleft < msg_len)
		goto too_short;

	/* OK that's enough. We have at least the whole message, and we have
	 * the protocol version.
	 */
	smp->type = SMP_T_UINT;
	smp->data.uint = version;
	smp->flags = SMP_F_VOLATILE;
	return 1;

 too_short:
	smp->flags = SMP_F_MAY_CHANGE;
 not_ssl:
	return 0;
}
Пример #5
0
/*
 * This function is called to send buffer data to a stream socket.
 * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll
 * before calling it again, otherwise 1. If a pipe was associated with the
 * buffer and it empties it, it releases it as well.
 */
static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b)
{
	int write_poll = MAX_WRITE_POLL_LOOPS;
	int retval = 1;
	int ret, max;

#if defined(CONFIG_HAP_LINUX_SPLICE)
	while (b->pipe) {
		ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data,
			     SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
		if (ret <= 0) {
			if (ret == 0 || errno == EAGAIN) {
				retval = 0;
				return retval;
			}
			/* here we have another error */
			retval = -1;
			return retval;
		}

		b->flags |= BF_WRITE_PARTIAL;
		b->pipe->data -= ret;

		if (!b->pipe->data) {
			put_pipe(b->pipe);
			b->pipe = NULL;
			break;
		}

		if (--write_poll <= 0)
			return retval;
	}

	/* At this point, the pipe is empty, but we may still have data pending
	 * in the normal buffer.
	 */
#endif
	if (!b->send_max) {
		b->flags |= BF_OUT_EMPTY;
		return retval;
	}

	/* when we're in this loop, we already know that there is no spliced
	 * data left, and that there are sendable buffered data.
	 */
	while (1) {
		if (b->r > b->w)
			max = b->r - b->w;
		else
			max = b->data + b->size - b->w;

		/* limit the amount of outgoing data if required */
		if (max > b->send_max)
			max = b->send_max;

		/* check if we want to inform the kernel that we're interested in
		 * sending more data after this call. We want this if :
		 *  - we're about to close after this last send and want to merge
		 *    the ongoing FIN with the last segment.
		 *  - we know we can't send everything at once and must get back
		 *    here because of unaligned data
		 *  - there is still a finite amount of data to forward
		 * The test is arranged so that the most common case does only 2
		 * tests.
		 */

		if (MSG_NOSIGNAL && MSG_MORE) {
			unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;

			if (((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) ||
			     ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) ||
			     (max != b->l && max != b->send_max))
			    && (fdtab[si->fd].flags & FD_FL_TCP)) {
				send_flag |= MSG_MORE;
			}
			else if (b->flags & BF_EXPECT_MORE) {
				/* it was forced on the buffer, this flag is one-shoot */
				b->flags &= ~BF_EXPECT_MORE;
				send_flag |= MSG_MORE;
			}

			/* this flag has precedence over the rest */
			if (b->flags & BF_SEND_DONTWAIT)
				send_flag &= ~MSG_MORE;

			ret = send(si->fd, b->w, max, send_flag);

			/* disable it only once everything has been sent */
			if (ret == max && (b->flags & BF_SEND_DONTWAIT))
				b->flags &= ~BF_SEND_DONTWAIT;
		} else {
			int skerr;
			socklen_t lskerr = sizeof(skerr);

			ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
			if (ret == -1 || skerr)
				ret = -1;
			else
				ret = send(si->fd, b->w, max, MSG_DONTWAIT);
		}

		if (ret > 0) {
			if (fdtab[si->fd].state == FD_STCONN)
				fdtab[si->fd].state = FD_STREADY;

			b->flags |= BF_WRITE_PARTIAL;

			b->w += ret;
			if (b->w == b->data + b->size)
				b->w = b->data; /* wrap around the buffer */

			b->l -= ret;
			if (likely(b->l < buffer_max_len(b)))
				b->flags &= ~BF_FULL;

			if (likely(!b->l))
				/* optimize data alignment in the buffer */
				b->r = b->w = b->lr = b->data;

			b->send_max -= ret;
			if (!b->send_max) {
				if (likely(!b->pipe))
					b->flags |= BF_OUT_EMPTY;
				break;
			}

			/* if the system buffer is full, don't insist */
			if (ret < max)
				break;

			if (--write_poll <= 0)
				break;
		}
		else if (ret == 0 || errno == EAGAIN) {
			/* nothing written, we need to poll for write first */
			retval = 0;
			break;
		}
		else {
			/* bad, we got an error */
			retval = -1;
			break;
		}
	} /* while (1) */

	return retval;
}