/* Tries to copy block <blk> at once into the channel's buffer after length * controls. The chn->o and to_forward pointers are updated. If the channel * input is closed, -2 is returned. If the block is too large for this buffer, * -3 is returned. If there is not enough room left in the buffer, -1 is * returned. Otherwise the number of bytes copied is returned (0 being a valid * number). Channel flag READ_PARTIAL is updated if some data can be * transferred. Channel flag CF_WAKE_WRITE is set if the write fails because * the buffer is full. */ int bi_putblk(struct channel *chn, const char *blk, int len) { int max; if (unlikely(channel_input_closed(chn))) return -2; max = buffer_max_len(chn); if (unlikely(len > max - buffer_len(chn->buf))) { /* we can't write this chunk right now because the buffer is * almost full or because the block is too large. Return the * available space or -2 if impossible. */ if (len > max) return -3; chn->flags |= CF_WAKE_WRITE; return -1; } if (unlikely(len == 0)) return 0; /* OK so the data fits in the buffer in one or two blocks */ max = buffer_contig_space(chn->buf); memcpy(bi_end(chn->buf), blk, MIN(len, max)); if (len > max) memcpy(chn->buf->data, blk + max, len - max); chn->buf->i += len; chn->total += len; if (chn->to_forward) { unsigned long fwd = len; if (chn->to_forward != CHN_INFINITE_FORWARD) { if (fwd > chn->to_forward) fwd = chn->to_forward; chn->to_forward -= fwd; } b_adv(chn->buf, fwd); } /* notify that some data was read from the SI into the buffer */ chn->flags |= CF_READ_PARTIAL; return len; }
/* * This function is called to send buffer data to a stream socket. * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll * before calling it again, otherwise 1. If a pipe was associated with the * buffer and it empties it, it releases it as well. */ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) { int write_poll = MAX_WRITE_POLL_LOOPS; int retval = 1; int ret, max; if (unlikely(si->send_proxy_ofs)) { /* The target server expects a PROXY line to be sent first. * If the send_proxy_ofs is negative, it corresponds to the * offset to start sending from then end of the proxy string * (which is recomputed every time since it's constant). If * it is positive, it means we have to send from the start. */ ret = make_proxy_line(trash, sizeof(trash), &b->prod->addr.from, &b->prod->addr.to); if (!ret) return -1; if (si->send_proxy_ofs > 0) si->send_proxy_ofs = -ret; /* first call */ /* we have to send trash from (ret+sp for -sp bytes) */ ret = send(si->fd, trash + ret + si->send_proxy_ofs, -si->send_proxy_ofs, (b->flags & BF_OUT_EMPTY) ? 0 : MSG_MORE); if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; si->send_proxy_ofs += ret; /* becomes zero once complete */ b->flags |= BF_WRITE_NULL; /* connect() succeeded */ } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ return 0; } else { /* bad, we got an error */ return -1; } } #if defined(CONFIG_HAP_LINUX_SPLICE) while (b->pipe) { ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0 || errno == EAGAIN) { retval = 0; return retval; } /* here we have another error */ retval = -1; return retval; } b->flags |= BF_WRITE_PARTIAL; b->pipe->data -= ret; if (!b->pipe->data) { put_pipe(b->pipe); b->pipe = NULL; break; } if (--write_poll <= 0) return retval; /* The only reason we did not empty the pipe is that the output * buffer is full. */ return 0; } /* At this point, the pipe is empty, but we may still have data pending * in the normal buffer. */ #endif if (!b->send_max) { b->flags |= BF_OUT_EMPTY; return retval; } /* when we're in this loop, we already know that there is no spliced * data left, and that there are sendable buffered data. */ while (1) { if (b->r > b->w) max = b->r - b->w; else max = b->data + b->size - b->w; /* limit the amount of outgoing data if required */ if (max > b->send_max) max = b->send_max; /* check if we want to inform the kernel that we're interested in * sending more data after this call. We want this if : * - we're about to close after this last send and want to merge * the ongoing FIN with the last segment. * - we know we can't send everything at once and must get back * here because of unaligned data * - there is still a finite amount of data to forward * The test is arranged so that the most common case does only 2 * tests. */ if (MSG_NOSIGNAL && MSG_MORE) { unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL; if ((!(b->flags & BF_NEVER_WAIT) && ((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) || (b->flags & BF_EXPECT_MORE))) || ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) || (max != b->l && max != b->send_max)) { send_flag |= MSG_MORE; } /* this flag has precedence over the rest */ if (b->flags & BF_SEND_DONTWAIT) send_flag &= ~MSG_MORE; ret = send(si->fd, b->w, max, send_flag); } else { int skerr; socklen_t lskerr = sizeof(skerr); ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); if (ret == -1 || skerr) ret = -1; else ret = send(si->fd, b->w, max, MSG_DONTWAIT); } if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; b->flags |= BF_WRITE_PARTIAL; b->w += ret; if (b->w == b->data + b->size) b->w = b->data; /* wrap around the buffer */ b->l -= ret; if (likely(b->l < buffer_max_len(b))) b->flags &= ~BF_FULL; if (likely(!b->l)) /* optimize data alignment in the buffer */ b->r = b->w = b->lr = b->data; b->send_max -= ret; if (!b->send_max) { /* Always clear both flags once everything has been sent, they're one-shot */ b->flags &= ~(BF_EXPECT_MORE | BF_SEND_DONTWAIT); if (likely(!b->pipe)) b->flags |= BF_OUT_EMPTY; break; } /* if the system buffer is full, don't insist */ if (ret < max) break; if (--write_poll <= 0) break; } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ retval = 0; break; } else { /* bad, we got an error */ retval = -1; break; } } /* while (1) */ return retval; }
/* * this function is called on a read event from a stream socket. * It returns 0 if we have a high confidence that we will not be * able to read more data without polling first. Returns non-zero * otherwise. */ int stream_sock_read(int fd) { struct stream_interface *si = fdtab[fd].owner; struct buffer *b = si->ib; int ret, max, retval, cur_read; int read_poll = MAX_READ_POLL_LOOPS; #ifdef DEBUG_FULL fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner); #endif retval = 1; /* stop immediately on errors. Note that we DON'T want to stop on * POLL_ERR, as the poller might report a write error while there * are still data available in the recv buffer. This typically * happens when we send too large a request to a backend server * which rejects it before reading it all. */ if (fdtab[fd].state == FD_STERROR) goto out_error; /* stop here if we reached the end of data */ if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) goto out_shutdown_r; /* maybe we were called immediately after an asynchronous shutr */ if (b->flags & BF_SHUTR) goto out_wakeup; #if defined(CONFIG_HAP_LINUX_SPLICE) if (b->to_forward >= MIN_SPLICE_FORWARD && b->flags & BF_KERN_SPLICING) { /* Under Linux, if FD_POLL_HUP is set, we have reached the end. * Since older splice() implementations were buggy and returned * EAGAIN on end of read, let's bypass the call to splice() now. */ if (fdtab[fd].ev & FD_POLL_HUP) goto out_shutdown_r; retval = stream_sock_splice_in(b, si); if (retval >= 0) { if (si->flags & SI_FL_ERR) goto out_error; if (b->flags & BF_READ_NULL) goto out_shutdown_r; goto out_wakeup; } /* splice not possible (anymore), let's go on on standard copy */ } #endif cur_read = 0; while (1) { max = buffer_max_len(b) - b->l; if (max <= 0) { b->flags |= BF_FULL; si->flags |= SI_FL_WAIT_ROOM; break; } /* * 1. compute the maximum block size we can read at once. */ if (b->l == 0) { /* let's realign the buffer to optimize I/O */ b->r = b->w = b->lr = b->data; } else if (b->r > b->w) { /* remaining space wraps at the end, with a moving limit */ if (max > b->data + b->size - b->r) max = b->data + b->size - b->r; } /* else max is already OK */ /* * 2. read the largest possible block */ ret = recv(fd, b->r, max, 0); if (ret > 0) { b->r += ret; b->l += ret; cur_read += ret; /* if we're allowed to directly forward data, we must update send_max */ if (b->to_forward && !(b->flags & (BF_SHUTW|BF_SHUTW_NOW))) { unsigned long fwd = ret; if (b->to_forward != BUF_INFINITE_FORWARD) { if (fwd > b->to_forward) fwd = b->to_forward; b->to_forward -= fwd; } b->send_max += fwd; b->flags &= ~BF_OUT_EMPTY; } if (fdtab[fd].state == FD_STCONN) fdtab[fd].state = FD_STREADY; b->flags |= BF_READ_PARTIAL; if (b->r == b->data + b->size) { b->r = b->data; /* wrap around the buffer */ } b->total += ret; if (b->l >= buffer_max_len(b)) { /* The buffer is now full, there's no point in going through * the loop again. */ if (!(b->flags & BF_STREAMER_FAST) && (cur_read == b->l)) { b->xfer_small = 0; b->xfer_large++; if (b->xfer_large >= 3) { /* we call this buffer a fast streamer if it manages * to be filled in one call 3 consecutive times. */ b->flags |= (BF_STREAMER | BF_STREAMER_FAST); //fputc('+', stderr); } } else if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) && (cur_read <= b->size / 2)) { b->xfer_large = 0; b->xfer_small++; if (b->xfer_small >= 2) { /* if the buffer has been at least half full twice, * we receive faster than we send, so at least it * is not a "fast streamer". */ b->flags &= ~BF_STREAMER_FAST; //fputc('-', stderr); } } else { b->xfer_small = 0; b->xfer_large = 0; } b->flags |= BF_FULL; si->flags |= SI_FL_WAIT_ROOM; break; } /* if too many bytes were missing from last read, it means that * it's pointless trying to read again because the system does * not have them in buffers. BTW, if FD_POLL_HUP was present, * it means that we have reached the end and that the connection * is closed. */ if (ret < max) { if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) && (cur_read <= b->size / 2)) { b->xfer_large = 0; b->xfer_small++; if (b->xfer_small >= 3) { /* we have read less than half of the buffer in * one pass, and this happened at least 3 times. * This is definitely not a streamer. */ b->flags &= ~(BF_STREAMER | BF_STREAMER_FAST); //fputc('!', stderr); } } /* unfortunately, on level-triggered events, POLL_HUP * is generally delivered AFTER the system buffer is * empty, so this one might never match. */ if (fdtab[fd].ev & FD_POLL_HUP) goto out_shutdown_r; /* if a streamer has read few data, it may be because we * have exhausted system buffers. It's not worth trying * again. */ if (b->flags & BF_STREAMER) break; /* generally if we read something smaller than 1 or 2 MSS, * it means that either we have exhausted the system's * buffers (streamer or question-response protocol) or * that the connection will be closed. Streamers are * easily detected so we return early. For other cases, * it's still better to perform a last read to be sure, * because it may save one complete poll/read/wakeup cycle * in case of shutdown. */ if (ret < MIN_RET_FOR_READ_LOOP && b->flags & BF_STREAMER) break; /* if we read a large block smaller than what we requested, * it's almost certain we'll never get anything more. */ if (ret >= global.tune.recv_enough) break; } if ((b->flags & BF_READ_DONTWAIT) || --read_poll <= 0) break; } else if (ret == 0) { /* connection closed */ goto out_shutdown_r; } else if (errno == EAGAIN) { /* Ignore EAGAIN but inform the poller that there is * nothing to read left if we did not read much, ie * less than what we were still expecting to read. * But we may have done some work justifying to notify * the task. */ if (cur_read < MIN_RET_FOR_READ_LOOP) retval = 0; break; } else { goto out_error; } } /* while (1) */ out_wakeup: /* We might have some data the consumer is waiting for. * We can do fast-forwarding, but we avoid doing this for partial * buffers, because it is very likely that it will be done again * immediately afterwards once the following data is parsed (eg: * HTTP chunking). */ if (b->pipe || /* always try to send spliced data */ (b->send_max == b->l && (b->cons->flags & SI_FL_WAIT_DATA))) { int last_len = b->pipe ? b->pipe->data : 0; b->cons->chk_snd(b->cons); /* check if the consumer has freed some space */ if (!(b->flags & BF_FULL) && (!last_len || !b->pipe || b->pipe->data < last_len)) si->flags &= ~SI_FL_WAIT_ROOM; } if (si->flags & SI_FL_WAIT_ROOM) { EV_FD_CLR(fd, DIR_RD); b->rex = TICK_ETERNITY; } else if ((b->flags & (BF_SHUTR|BF_READ_PARTIAL|BF_FULL|BF_DONT_READ|BF_READ_NOEXP)) == BF_READ_PARTIAL) b->rex = tick_add_ifset(now_ms, b->rto); /* we have to wake up if there is a special event or if we don't have * any more data to forward. */ if ((b->flags & (BF_READ_NULL|BF_READ_ERROR)) || si->state != SI_ST_EST || (si->flags & SI_FL_ERR) || ((b->flags & BF_READ_PARTIAL) && (!b->to_forward || b->cons->state != SI_ST_EST))) task_wakeup(si->owner, TASK_WOKEN_IO); if (b->flags & BF_READ_ACTIVITY) b->flags &= ~BF_READ_DONTWAIT; fdtab[fd].ev &= ~FD_POLL_IN; return retval; out_shutdown_r: /* we received a shutdown */ fdtab[fd].ev &= ~FD_POLL_HUP; b->flags |= BF_READ_NULL; if (b->flags & BF_AUTO_CLOSE) buffer_shutw_now(b); stream_sock_shutr(si); goto out_wakeup; out_error: /* Read error on the file descriptor. We mark the FD as STERROR so * that we don't use it anymore. The error is reported to the stream * interface which will take proper action. We must not perturbate the * buffer because the stream interface wants to ensure transparent * connection retries. */ fdtab[fd].state = FD_STERROR; fdtab[fd].ev &= ~FD_POLL_STICKY; EV_FD_REM(fd); si->flags |= SI_FL_ERR; retval = 1; goto out_wakeup; }
/* Return the version of the SSL protocol in the request. It supports both * SSLv3 (TLSv1) header format for any message, and SSLv2 header format for * the hello message. The SSLv3 format is described in RFC 2246 p49, and the * SSLv2 format is described here, and completed p67 of RFC 2246 : * http://wp.netscape.com/eng/security/SSL_2.html * * Note: this decoder only works with non-wrapping data. */ static int smp_fetch_req_ssl_ver(struct proxy *px, struct session *s, void *l7, unsigned int opt, const struct arg *args, struct sample *smp, const char *kw) { int version, bleft, msg_len; const unsigned char *data; if (!s || !s->req) return 0; msg_len = 0; bleft = s->req->buf->i; if (!bleft) goto too_short; data = (const unsigned char *)s->req->buf->p; if ((*data >= 0x14 && *data <= 0x17) || (*data == 0xFF)) { /* SSLv3 header format */ if (bleft < 5) goto too_short; version = (data[1] << 16) + data[2]; /* version: major, minor */ msg_len = (data[3] << 8) + data[4]; /* record length */ /* format introduced with SSLv3 */ if (version < 0x00030000) goto not_ssl; /* message length between 1 and 2^14 + 2048 */ if (msg_len < 1 || msg_len > ((1<<14) + 2048)) goto not_ssl; bleft -= 5; data += 5; } else { /* SSLv2 header format, only supported for hello (msg type 1) */ int rlen, plen, cilen, silen, chlen; if (*data & 0x80) { if (bleft < 3) goto too_short; /* short header format : 15 bits for length */ rlen = ((data[0] & 0x7F) << 8) | data[1]; plen = 0; bleft -= 2; data += 2; } else { if (bleft < 4) goto too_short; /* long header format : 14 bits for length + pad length */ rlen = ((data[0] & 0x3F) << 8) | data[1]; plen = data[2]; bleft -= 3; data += 2; } if (*data != 0x01) goto not_ssl; bleft--; data++; if (bleft < 8) goto too_short; version = (data[0] << 16) + data[1]; /* version: major, minor */ cilen = (data[2] << 8) + data[3]; /* cipher len, multiple of 3 */ silen = (data[4] << 8) + data[5]; /* session_id_len: 0 or 16 */ chlen = (data[6] << 8) + data[7]; /* 16<=challenge length<=32 */ bleft -= 8; data += 8; if (cilen % 3 != 0) goto not_ssl; if (silen && silen != 16) goto not_ssl; if (chlen < 16 || chlen > 32) goto not_ssl; if (rlen != 9 + cilen + silen + chlen) goto not_ssl; /* focus on the remaining data length */ msg_len = cilen + silen + chlen + plen; } /* We could recursively check that the buffer ends exactly on an SSL * fragment boundary and that a possible next segment is still SSL, * but that's a bit pointless. However, we could still check that * all the part of the request which fits in a buffer is already * there. */ if (msg_len > buffer_max_len(s->req) + s->req->buf->data - s->req->buf->p) msg_len = buffer_max_len(s->req) + s->req->buf->data - s->req->buf->p; if (bleft < msg_len) goto too_short; /* OK that's enough. We have at least the whole message, and we have * the protocol version. */ smp->type = SMP_T_UINT; smp->data.uint = version; smp->flags = SMP_F_VOLATILE; return 1; too_short: smp->flags = SMP_F_MAY_CHANGE; not_ssl: return 0; }
/* * This function is called to send buffer data to a stream socket. * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll * before calling it again, otherwise 1. If a pipe was associated with the * buffer and it empties it, it releases it as well. */ static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b) { int write_poll = MAX_WRITE_POLL_LOOPS; int retval = 1; int ret, max; #if defined(CONFIG_HAP_LINUX_SPLICE) while (b->pipe) { ret = splice(b->pipe->cons, NULL, si->fd, NULL, b->pipe->data, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0 || errno == EAGAIN) { retval = 0; return retval; } /* here we have another error */ retval = -1; return retval; } b->flags |= BF_WRITE_PARTIAL; b->pipe->data -= ret; if (!b->pipe->data) { put_pipe(b->pipe); b->pipe = NULL; break; } if (--write_poll <= 0) return retval; } /* At this point, the pipe is empty, but we may still have data pending * in the normal buffer. */ #endif if (!b->send_max) { b->flags |= BF_OUT_EMPTY; return retval; } /* when we're in this loop, we already know that there is no spliced * data left, and that there are sendable buffered data. */ while (1) { if (b->r > b->w) max = b->r - b->w; else max = b->data + b->size - b->w; /* limit the amount of outgoing data if required */ if (max > b->send_max) max = b->send_max; /* check if we want to inform the kernel that we're interested in * sending more data after this call. We want this if : * - we're about to close after this last send and want to merge * the ongoing FIN with the last segment. * - we know we can't send everything at once and must get back * here because of unaligned data * - there is still a finite amount of data to forward * The test is arranged so that the most common case does only 2 * tests. */ if (MSG_NOSIGNAL && MSG_MORE) { unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL; if (((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) || ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) || (max != b->l && max != b->send_max)) && (fdtab[si->fd].flags & FD_FL_TCP)) { send_flag |= MSG_MORE; } else if (b->flags & BF_EXPECT_MORE) { /* it was forced on the buffer, this flag is one-shoot */ b->flags &= ~BF_EXPECT_MORE; send_flag |= MSG_MORE; } /* this flag has precedence over the rest */ if (b->flags & BF_SEND_DONTWAIT) send_flag &= ~MSG_MORE; ret = send(si->fd, b->w, max, send_flag); /* disable it only once everything has been sent */ if (ret == max && (b->flags & BF_SEND_DONTWAIT)) b->flags &= ~BF_SEND_DONTWAIT; } else { int skerr; socklen_t lskerr = sizeof(skerr); ret = getsockopt(si->fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr); if (ret == -1 || skerr) ret = -1; else ret = send(si->fd, b->w, max, MSG_DONTWAIT); } if (ret > 0) { if (fdtab[si->fd].state == FD_STCONN) fdtab[si->fd].state = FD_STREADY; b->flags |= BF_WRITE_PARTIAL; b->w += ret; if (b->w == b->data + b->size) b->w = b->data; /* wrap around the buffer */ b->l -= ret; if (likely(b->l < buffer_max_len(b))) b->flags &= ~BF_FULL; if (likely(!b->l)) /* optimize data alignment in the buffer */ b->r = b->w = b->lr = b->data; b->send_max -= ret; if (!b->send_max) { if (likely(!b->pipe)) b->flags |= BF_OUT_EMPTY; break; } /* if the system buffer is full, don't insist */ if (ret < max) break; if (--write_poll <= 0) break; } else if (ret == 0 || errno == EAGAIN) { /* nothing written, we need to poll for write first */ retval = 0; break; } else { /* bad, we got an error */ retval = -1; break; } } /* while (1) */ return retval; }