/* Returns : * -1 if splice is not possible or not possible anymore and we must switch to * user-land copy (eg: to_forward reached) * 0 when we know that polling is required to get more data (EAGAIN) * 1 for all other cases (we can safely try again, or if an activity has been * detected (DATA/NULL/ERR)) * Sets : * BF_READ_NULL * BF_READ_PARTIAL * BF_WRITE_PARTIAL (during copy) * BF_OUT_EMPTY (during copy) * SI_FL_ERR * SI_FL_WAIT_ROOM * (SI_FL_WAIT_RECV) * * This function automatically allocates a pipe from the pipe pool. It also * carefully ensures to clear b->pipe whenever it leaves the pipe empty. */ static int stream_sock_splice_in(struct buffer *b, struct stream_interface *si) { static int splice_detects_close; int fd = si->fd; int ret; unsigned long max; int retval = 1; if (!b->to_forward) return -1; if (!(b->flags & BF_KERN_SPLICING)) return -1; if (b->l) { /* We're embarrassed, there are already data pending in * the buffer and we don't want to have them at two * locations at a time. Let's indicate we need some * place and ask the consumer to hurry. */ si->flags |= SI_FL_WAIT_ROOM; EV_FD_CLR(fd, DIR_RD); b->rex = TICK_ETERNITY; b->cons->chk_snd(b->cons); return 1; } if (unlikely(b->pipe == NULL)) { if (pipes_used >= global.maxpipes || !(b->pipe = get_pipe())) { b->flags &= ~BF_KERN_SPLICING; return -1; } } /* At this point, b->pipe is valid */ while (1) { if (b->to_forward == BUF_INFINITE_FORWARD) max = MAX_SPLICE_AT_ONCE; else max = b->to_forward; if (!max) { /* It looks like the buffer + the pipe already contain * the maximum amount of data to be transferred. Try to * send those data immediately on the other side if it * is currently waiting. */ retval = -1; /* end of forwarding */ break; } ret = splice(fd, NULL, b->pipe->prod, NULL, max, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0) { /* connection closed. This is only detected by * recent kernels (>= 2.6.27.13). If we notice * it works, we store the info for later use. */ splice_detects_close = 1; b->flags |= BF_READ_NULL; retval = 1; /* no need for further polling */ break; } if (errno == EAGAIN) { /* there are two reasons for EAGAIN : * - nothing in the socket buffer (standard) * - pipe is full * - the connection is closed (kernel < 2.6.27.13) * Since we don't know if pipe is full, we'll * stop if the pipe is not empty. Anyway, we * will almost always fill/empty the pipe. */ if (b->pipe->data) { si->flags |= SI_FL_WAIT_ROOM; retval = 1; break; } /* We don't know if the connection was closed, * but if we know splice detects close, then we * know it for sure. * But if we're called upon POLLIN with an empty * pipe and get EAGAIN, it is suspect enough to * try to fall back to the normal recv scheme * which will be able to deal with the situation. */ if (splice_detects_close) retval = 0; /* we know for sure that it's EAGAIN */ else retval = -1; break; } if (errno == ENOSYS || errno == EINVAL) { /* splice not supported on this end, disable it */ b->flags &= ~BF_KERN_SPLICING; si->flags &= ~SI_FL_CAP_SPLICE; put_pipe(b->pipe); b->pipe = NULL; return -1; } /* here we have another error */ si->flags |= SI_FL_ERR; retval = 1; break; } /* ret <= 0 */ if (b->to_forward != BUF_INFINITE_FORWARD) b->to_forward -= ret; b->total += ret; b->pipe->data += ret; b->flags |= BF_READ_PARTIAL; b->flags &= ~BF_OUT_EMPTY; if (b->pipe->data >= SPLICE_FULL_HINT || ret >= global.tune.recv_enough) { /* We've read enough of it for this time. */ retval = 1; break; } } /* while */ if (unlikely(!b->pipe->data)) { put_pipe(b->pipe); b->pipe = NULL; } return retval; }
/* * This function is called to send buffer data to a stream socket. * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll * before calling it again, otherwise 1. If a pipe was associated with the * buffer and it empties it, it releases it as well. */ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) { int write_poll = MAX_WRITE_POLL_LOOPS; int retval = 1; int ret, max; if (unlikely(si->send_proxy_ofs)) { /* The target server expects a PROXY line to be sent first. * If the send_proxy_ofs is negative, it corresponds to the * offset to start sending from then end of the proxy string * (which is recomputed every time since it's constant). If * it is positive, it means we have to send from the start. */ ret = make_proxy_line(trash, sizeof(trash), &b->prod->addr.from, &b->prod->addr.to); if (!ret) return -1; if (si->send_proxy_ofs > 0) si->send_proxy_ofs = -ret; /* first call */ /* we have to send trash from (ret+sp for -sp bytes) */ ret = send(si->fd, trash + ret + si->send_proxy_ofs, -si->send_proxy_ofs, (b->flags & BF_OUT_EMPTY) ? 0 : MSG_MORE); if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; si->send_proxy_ofs += ret; /* becomes zero once complete */ b->flags |= BF_WRITE_NULL; /* connect() succeeded */ } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ return 0; } else { /* bad, we got an error */ return -1; } } #if defined(CONFIG_HAP_LINUX_SPLICE) while (b->pipe) { ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0 || errno == EAGAIN) { retval = 0; return retval; } /* here we have another error */ retval = -1; return retval; } b->flags |= BF_WRITE_PARTIAL; b->pipe->data -= ret; if (!b->pipe->data) { put_pipe(b->pipe); b->pipe = NULL; break; } if (--write_poll <= 0) return retval; /* The only reason we did not empty the pipe is that the output * buffer is full. */ return 0; } /* At this point, the pipe is empty, but we may still have data pending * in the normal buffer. */ #endif if (!b->send_max) { b->flags |= BF_OUT_EMPTY; return retval; } /* when we're in this loop, we already know that there is no spliced * data left, and that there are sendable buffered data. */ while (1) { if (b->r > b->w) max = b->r - b->w; else max = b->data + b->size - b->w; /* limit the amount of outgoing data if required */ if (max > b->send_max) max = b->send_max; /* check if we want to inform the kernel that we're interested in * sending more data after this call. We want this if : * - we're about to close after this last send and want to merge * the ongoing FIN with the last segment. * - we know we can't send everything at once and must get back * here because of unaligned data * - there is still a finite amount of data to forward * The test is arranged so that the most common case does only 2 * tests. */ if (MSG_NOSIGNAL && MSG_MORE) { unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL; if ((!(b->flags & BF_NEVER_WAIT) && ((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) || (b->flags & BF_EXPECT_MORE))) || ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) || (max != b->l && max != b->send_max)) { send_flag |= MSG_MORE; } /* this flag has precedence over the rest */ if (b->flags & BF_SEND_DONTWAIT) send_flag &= ~MSG_MORE; ret = send(si->fd, b->w, max, send_flag); } else { int skerr; socklen_t lskerr = sizeof(skerr); ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); if (ret == -1 || skerr) ret = -1; else ret = send(si->fd, b->w, max, MSG_DONTWAIT); } if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; b->flags |= BF_WRITE_PARTIAL; b->w += ret; if (b->w == b->data + b->size) b->w = b->data; /* wrap around the buffer */ b->l -= ret; if (likely(b->l < buffer_max_len(b))) b->flags &= ~BF_FULL; if (likely(!b->l)) /* optimize data alignment in the buffer */ b->r = b->w = b->lr = b->data; b->send_max -= ret; if (!b->send_max) { /* Always clear both flags once everything has been sent, they're one-shot */ b->flags &= ~(BF_EXPECT_MORE | BF_SEND_DONTWAIT); if (likely(!b->pipe)) b->flags |= BF_OUT_EMPTY; break; } /* if the system buffer is full, don't insist */ if (ret < max) break; if (--write_poll <= 0) break; } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ retval = 0; break; } else { /* bad, we got an error */ retval = -1; break; } } /* while (1) */ return retval; }
/* * This function is called to send buffer data to a stream socket. * It calls the transport layer's snd_buf function. It relies on the * caller to commit polling changes. The caller should check conn->flags * for errors. */ static void si_conn_send(struct connection *conn) { struct stream_interface *si = conn->owner; struct channel *chn = si->ob; int ret; if (chn->pipe && conn->xprt->snd_pipe) { ret = conn->xprt->snd_pipe(conn, chn->pipe); if (ret > 0) chn->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA; if (!chn->pipe->data) { put_pipe(chn->pipe); chn->pipe = NULL; } if (conn->flags & CO_FL_ERROR) return; } /* At this point, the pipe is empty, but we may still have data pending * in the normal buffer. */ if (!chn->buf->o) return; /* when we're here, we already know that there is no spliced * data left, and that there are sendable buffered data. */ if (!(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_HANDSHAKE))) { /* check if we want to inform the kernel that we're interested in * sending more data after this call. We want this if : * - we're about to close after this last send and want to merge * the ongoing FIN with the last segment. * - we know we can't send everything at once and must get back * here because of unaligned data * - there is still a finite amount of data to forward * The test is arranged so that the most common case does only 2 * tests. */ unsigned int send_flag = 0; if ((!(chn->flags & (CF_NEVER_WAIT|CF_SEND_DONTWAIT)) && ((chn->to_forward && chn->to_forward != CHN_INFINITE_FORWARD) || (chn->flags & CF_EXPECT_MORE))) || ((chn->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW)) send_flag |= CO_SFL_MSG_MORE; if (chn->flags & CF_STREAMER) send_flag |= CO_SFL_STREAMER; ret = conn->xprt->snd_buf(conn, chn->buf, send_flag); if (ret > 0) { chn->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA; if (!chn->buf->o) { /* Always clear both flags once everything has been sent, they're one-shot */ chn->flags &= ~(CF_EXPECT_MORE | CF_SEND_DONTWAIT); } /* if some data remain in the buffer, it's only because the * system buffers are full, we will try next time. */ } } return; }
/* * This function is called to send buffer data to a stream socket. * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll * before calling it again, otherwise 1. If a pipe was associated with the * buffer and it empties it, it releases it as well. */ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) { int write_poll = MAX_WRITE_POLL_LOOPS; int retval = 1; int ret, max; #if defined(CONFIG_HAP_LINUX_SPLICE) while (b->pipe) { ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0 || errno == EAGAIN) { retval = 0; return retval; } /* here we have another error */ retval = -1; return retval; } b->flags |= BF_WRITE_PARTIAL; b->pipe->data -= ret; if (!b->pipe->data) { put_pipe(b->pipe); b->pipe = NULL; break; } if (--write_poll <= 0) return retval; } /* At this point, the pipe is empty, but we may still have data pending * in the normal buffer. */ #endif if (!b->send_max) { b->flags |= BF_OUT_EMPTY; return retval; } /* when we're in this loop, we already know that there is no spliced * data left, and that there are sendable buffered data. */ while (1) { if (b->r > b->w) max = b->r - b->w; else max = b->data + b->size - b->w; /* limit the amount of outgoing data if required */ if (max > b->send_max) max = b->send_max; /* check if we want to inform the kernel that we're interested in * sending more data after this call. We want this if : * - we're about to close after this last send and want to merge * the ongoing FIN with the last segment. * - we know we can't send everything at once and must get back * here because of unaligned data * - there is still a finite amount of data to forward * The test is arranged so that the most common case does only 2 * tests. */ if (MSG_NOSIGNAL && MSG_MORE) { unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL; if (((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) || ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) || (max != b->l && max != b->send_max)) && (fdtab[si->fd].flags & FD_FL_TCP)) { send_flag |= MSG_MORE; } else if (b->flags & BF_EXPECT_MORE) { /* it was forced on the buffer, this flag is one-shoot */ b->flags &= ~BF_EXPECT_MORE; send_flag |= MSG_MORE; } /* this flag has precedence over the rest */ if (b->flags & BF_SEND_DONTWAIT) send_flag &= ~MSG_MORE; ret = send(si->fd, b->w, max, send_flag); /* disable it only once everything has been sent */ if (ret == max && (b->flags & BF_SEND_DONTWAIT)) b->flags &= ~BF_SEND_DONTWAIT; } else { int skerr; socklen_t lskerr = sizeof(skerr); ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); if (ret == -1 || skerr) ret = -1; else ret = send(si->fd, b->w, max, MSG_DONTWAIT); } if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; b->flags |= BF_WRITE_PARTIAL; b->w += ret; if (b->w == b->data + b->size) b->w = b->data; /* wrap around the buffer */ b->l -= ret; if (likely(b->l < buffer_max_len(b))) b->flags &= ~BF_FULL; if (likely(!b->l)) /* optimize data alignment in the buffer */ b->r = b->w = b->lr = b->data; b->send_max -= ret; if (!b->send_max) { if (likely(!b->pipe)) b->flags |= BF_OUT_EMPTY; break; } /* if the system buffer is full, don't insist */ if (ret < max) break; if (--write_poll <= 0) break; } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ retval = 0; break; } else { /* bad, we got an error */ retval = -1; break; } } /* while (1) */ return retval; }
/* * This is the callback which is called by the connection layer to receive data * into the buffer from the connection. It iterates over the transport layer's * rcv_buf function. */ static void si_conn_recv_cb(struct connection *conn) { struct stream_interface *si = conn->owner; struct channel *chn = si->ib; int ret, max, cur_read; int read_poll = MAX_READ_POLL_LOOPS; /* stop immediately on errors. Note that we DON'T want to stop on * POLL_ERR, as the poller might report a write error while there * are still data available in the recv buffer. This typically * happens when we send too large a request to a backend server * which rejects it before reading it all. */ if (conn->flags & CO_FL_ERROR) return; /* stop here if we reached the end of data */ if (conn_data_read0_pending(conn)) goto out_shutdown_r; /* maybe we were called immediately after an asynchronous shutr */ if (chn->flags & CF_SHUTR) return; cur_read = 0; if ((chn->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !chn->buf->o && global.tune.idle_timer && (unsigned short)(now_ms - chn->last_read) >= global.tune.idle_timer) { /* The buffer was empty and nothing was transferred for more * than one second. This was caused by a pause and not by * congestion. Reset any streaming mode to reduce latency. */ chn->xfer_small = 0; chn->xfer_large = 0; chn->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); } /* First, let's see if we may splice data across the channel without * using a buffer. */ if (conn->xprt->rcv_pipe && (chn->pipe || chn->to_forward >= MIN_SPLICE_FORWARD) && chn->flags & CF_KERN_SPLICING) { if (buffer_not_empty(chn->buf)) { /* We're embarrassed, there are already data pending in * the buffer and we don't want to have them at two * locations at a time. Let's indicate we need some * place and ask the consumer to hurry. */ goto abort_splice; } if (unlikely(chn->pipe == NULL)) { if (pipes_used >= global.maxpipes || !(chn->pipe = get_pipe())) { chn->flags &= ~CF_KERN_SPLICING; goto abort_splice; } } ret = conn->xprt->rcv_pipe(conn, chn->pipe, chn->to_forward); if (ret < 0) { /* splice not supported on this end, let's disable it */ chn->flags &= ~CF_KERN_SPLICING; goto abort_splice; } if (ret > 0) { if (chn->to_forward != CHN_INFINITE_FORWARD) chn->to_forward -= ret; chn->total += ret; cur_read += ret; chn->flags |= CF_READ_PARTIAL; } if (conn_data_read0_pending(conn)) goto out_shutdown_r; if (conn->flags & CO_FL_ERROR) return; if (conn->flags & CO_FL_WAIT_ROOM) { /* the pipe is full or we have read enough data that it * could soon be full. Let's stop before needing to poll. */ si->flags |= SI_FL_WAIT_ROOM; __conn_data_stop_recv(conn); } /* splice not possible (anymore), let's go on on standard copy */ } abort_splice: if (chn->pipe && unlikely(!chn->pipe->data)) { put_pipe(chn->pipe); chn->pipe = NULL; } /* Important note : if we're called with POLL_IN|POLL_HUP, it means the read polling * was enabled, which implies that the recv buffer was not full. So we have a guarantee * that if such an event is not handled above in splice, it will be handled here by * recv(). */ while (!(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH | CO_FL_WAIT_ROOM | CO_FL_HANDSHAKE))) { max = bi_avail(chn); if (!max) { si->flags |= SI_FL_WAIT_ROOM; break; } ret = conn->xprt->rcv_buf(conn, chn->buf, max); if (ret <= 0) break; cur_read += ret; /* if we're allowed to directly forward data, we must update ->o */ if (chn->to_forward && !(chn->flags & (CF_SHUTW|CF_SHUTW_NOW))) { unsigned long fwd = ret; if (chn->to_forward != CHN_INFINITE_FORWARD) { if (fwd > chn->to_forward) fwd = chn->to_forward; chn->to_forward -= fwd; } b_adv(chn->buf, fwd); } chn->flags |= CF_READ_PARTIAL; chn->total += ret; if (channel_full(chn)) { si->flags |= SI_FL_WAIT_ROOM; break; } if ((chn->flags & CF_READ_DONTWAIT) || --read_poll <= 0) { si->flags |= SI_FL_WAIT_ROOM; __conn_data_stop_recv(conn); break; } /* if too many bytes were missing from last read, it means that * it's pointless trying to read again because the system does * not have them in buffers. */ if (ret < max) { /* if a streamer has read few data, it may be because we * have exhausted system buffers. It's not worth trying * again. */ if (chn->flags & CF_STREAMER) break; /* if we read a large block smaller than what we requested, * it's almost certain we'll never get anything more. */ if (ret >= global.tune.recv_enough) break; } } /* while !flags */ if (conn->flags & CO_FL_ERROR) return; if (cur_read) { if ((chn->flags & (CF_STREAMER | CF_STREAMER_FAST)) && (cur_read <= chn->buf->size / 2)) { chn->xfer_large = 0; chn->xfer_small++; if (chn->xfer_small >= 3) { /* we have read less than half of the buffer in * one pass, and this happened at least 3 times. * This is definitely not a streamer. */ chn->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); } else if (chn->xfer_small >= 2) { /* if the buffer has been at least half full twice, * we receive faster than we send, so at least it * is not a "fast streamer". */ chn->flags &= ~CF_STREAMER_FAST; } } else if (!(chn->flags & CF_STREAMER_FAST) && (cur_read >= chn->buf->size - global.tune.maxrewrite)) { /* we read a full buffer at once */ chn->xfer_small = 0; chn->xfer_large++; if (chn->xfer_large >= 3) { /* we call this buffer a fast streamer if it manages * to be filled in one call 3 consecutive times. */ chn->flags |= (CF_STREAMER | CF_STREAMER_FAST); } } else { chn->xfer_small = 0; chn->xfer_large = 0; } chn->last_read = now_ms; } if (conn_data_read0_pending(conn)) /* connection closed */ goto out_shutdown_r; return; out_shutdown_r: /* we received a shutdown */ chn->flags |= CF_READ_NULL; if (chn->flags & CF_AUTO_CLOSE) channel_shutw_now(chn); stream_sock_read0(si); conn_data_read0(conn); return; }