/* Marks a ready listener as limited so that we only try to re-enable it when * resources are free again. It will be queued into the specified queue. */ void limit_listener(struct listener *l, struct list *list) { if (l->state == LI_READY) { LIST_ADDQ(list, &l->wait_queue); EV_FD_CLR(l->fd, DIR_RD); l->state = LI_LIMITED; } }
/* This function removes the specified listener's file descriptor from the * polling lists if it is in the LI_READY or in the LI_FULL state. The listener * enters LI_LISTEN. */ void disable_listener(struct listener *listener) { if (listener->state < LI_READY) return; if (listener->state == LI_READY) EV_FD_CLR(listener->fd, DIR_RD); if (listener->state == LI_LIMITED) LIST_DEL(&listener->wait_queue); listener->state = LI_LISTEN; }
/* Marks a ready listener as full so that the session code tries to re-enable * it upon next close() using resume_listener(). */ void listener_full(struct listener *l) { if (l->state >= LI_READY) { if (l->state == LI_LIMITED) LIST_DEL(&l->wait_queue); EV_FD_CLR(l->fd, DIR_RD); l->state = LI_FULL; } }
/* * This function performs a shutdown-write on a stream interface in a connected or * init state (it does nothing for other states). It either shuts the write side * or closes the file descriptor and marks itself as closed. The buffer flags are * updated to reflect the new state. It does also close everything is the SI was * marked as being in error state. */ void stream_sock_shutw(struct stream_interface *si) { si->ob->flags &= ~BF_SHUTW_NOW; if (si->ob->flags & BF_SHUTW) return; si->ob->flags |= BF_SHUTW; si->ob->wex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_DATA; switch (si->state) { case SI_ST_EST: /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. * However, if SI_FL_NOLINGER is explicitly set, we know there is * no risk so we close both sides immediately. */ if (si->flags & SI_FL_ERR) { /* quick close, the socket is already shut. Remove pending flags. */ si->flags &= ~SI_FL_NOLINGER; } else if (si->flags & SI_FL_NOLINGER) { si->flags &= ~SI_FL_NOLINGER; setsockopt(si->fd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger)); } else { EV_FD_CLR(si->fd, DIR_WR); shutdown(si->fd, SHUT_WR); if (!(si->ib->flags & (BF_SHUTR|BF_DONT_READ))) return; } /* fall through */ case SI_ST_CON: /* we may have to close a pending connection, and mark the * response buffer as shutr */ fd_delete(si->fd); /* fall through */ case SI_ST_CER: case SI_ST_QUE: case SI_ST_TAR: si->state = SI_ST_DIS; default: si->flags &= ~SI_FL_WAIT_ROOM; si->ib->flags |= BF_SHUTR; si->ib->rex = TICK_ETERNITY; si->exp = TICK_ETERNITY; return; } if (si->release) si->release(si); }
/* * Linux unbinds the listen socket after a SHUT_RD, and ignores SHUT_WR. * Solaris refuses either shutdown(). * OpenBSD ignores SHUT_RD but closes upon SHUT_WR and refuses to rebind. * So a common validation path involves SHUT_WR && listen && SHUT_RD. * If disabling at least one listener returns an error, then the proxy * state is set to PR_STERROR because we don't know how to resume from this. */ void pause_proxy(struct proxy *p) { struct listener *l; for (l = p->listen; l != NULL; l = l->next) { if (shutdown(l->fd, SHUT_WR) == 0 && listen(l->fd, p->backlog ? p->backlog : p->maxconn) == 0 && shutdown(l->fd, SHUT_RD) == 0) { EV_FD_CLR(l->fd, DIR_RD); if (p->state != PR_STERROR) p->state = PR_STPAUSED; } else p->state = PR_STERROR; } }
/* This function tries to temporarily disable a listener, depending on the OS * capabilities. Linux unbinds the listen socket after a SHUT_RD, and ignores * SHUT_WR. Solaris refuses either shutdown(). OpenBSD ignores SHUT_RD but * closes upon SHUT_WR and refuses to rebind. So a common validation path * involves SHUT_WR && listen && SHUT_RD. In case of success, the FD's polling * is disabled. It normally returns non-zero, unless an error is reported. */ int pause_listener(struct listener *l) { if (l->state <= LI_PAUSED) return 1; if (shutdown(l->fd, SHUT_WR) != 0) return 0; /* Solaris dies here */ if (listen(l->fd, l->backlog ? l->backlog : l->maxconn) != 0) return 0; /* OpenBSD dies here */ if (shutdown(l->fd, SHUT_RD) != 0) return 0; /* should always be OK */ if (l->state == LI_LIMITED) LIST_DEL(&l->wait_queue); EV_FD_CLR(l->fd, DIR_RD); l->state = LI_PAUSED; return 1; }
/* * This function performs a shutdown-read on a stream interface in a connected or * init state (it does nothing for other states). It either shuts the read side * or closes the file descriptor and marks itself as closed. The buffer flags are * updated to reflect the new state. */ void stream_sock_shutr(struct stream_interface *si) { si->ib->flags &= ~BF_SHUTR_NOW; if (si->ib->flags & BF_SHUTR) return; si->ib->flags |= BF_SHUTR; si->ib->rex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_ROOM; if (si->state != SI_ST_EST && si->state != SI_ST_CON) return; if (si->ob->flags & BF_SHUTW) { fd_delete(si->fd); si->state = SI_ST_DIS; si->exp = TICK_ETERNITY; return; } EV_FD_CLR(si->fd, DIR_RD); return; }
/* Returns : * -1 if splice is not possible or not possible anymore and we must switch to * user-land copy (eg: to_forward reached) * 0 when we know that polling is required to get more data (EAGAIN) * 1 for all other cases (we can safely try again, or if an activity has been * detected (DATA/NULL/ERR)) * Sets : * BF_READ_NULL * BF_READ_PARTIAL * BF_WRITE_PARTIAL (during copy) * BF_OUT_EMPTY (during copy) * SI_FL_ERR * SI_FL_WAIT_ROOM * (SI_FL_WAIT_RECV) * * This function automatically allocates a pipe from the pipe pool. It also * carefully ensures to clear b->pipe whenever it leaves the pipe empty. */ static int stream_sock_splice_in(struct buffer *b, struct stream_interface *si) { static int splice_detects_close; int fd = si->fd; int ret; unsigned long max; int retval = 1; if (!b->to_forward) return -1; if (!(b->flags & BF_KERN_SPLICING)) return -1; if (b->l) { /* We're embarrassed, there are already data pending in * the buffer and we don't want to have them at two * locations at a time. Let's indicate we need some * place and ask the consumer to hurry. */ si->flags |= SI_FL_WAIT_ROOM; EV_FD_CLR(fd, DIR_RD); b->rex = TICK_ETERNITY; b->cons->chk_snd(b->cons); return 1; } if (unlikely(b->pipe == NULL)) { if (pipes_used >= global.maxpipes || !(b->pipe = get_pipe())) { b->flags &= ~BF_KERN_SPLICING; return -1; } } /* At this point, b->pipe is valid */ while (1) { if (b->to_forward == BUF_INFINITE_FORWARD) max = MAX_SPLICE_AT_ONCE; else max = b->to_forward; if (!max) { /* It looks like the buffer + the pipe already contain * the maximum amount of data to be transferred. Try to * send those data immediately on the other side if it * is currently waiting. */ retval = -1; /* end of forwarding */ break; } ret = splice(fd, NULL, b->pipe->prod, NULL, max, SPLICE_F_MOVE|SPLICE_F_NONBLOCK); if (ret <= 0) { if (ret == 0) { /* connection closed. This is only detected by * recent kernels (>= 2.6.27.13). If we notice * it works, we store the info for later use. */ splice_detects_close = 1; b->flags |= BF_READ_NULL; retval = 1; /* no need for further polling */ break; } if (errno == EAGAIN) { /* there are two reasons for EAGAIN : * - nothing in the socket buffer (standard) * - pipe is full * - the connection is closed (kernel < 2.6.27.13) * Since we don't know if pipe is full, we'll * stop if the pipe is not empty. Anyway, we * will almost always fill/empty the pipe. */ if (b->pipe->data) { si->flags |= SI_FL_WAIT_ROOM; retval = 1; break; } /* We don't know if the connection was closed, * but if we know splice detects close, then we * know it for sure. * But if we're called upon POLLIN with an empty * pipe and get EAGAIN, it is suspect enough to * try to fall back to the normal recv scheme * which will be able to deal with the situation. */ if (splice_detects_close) retval = 0; /* we know for sure that it's EAGAIN */ else retval = -1; break; } if (errno == ENOSYS || errno == EINVAL) { /* splice not supported on this end, disable it */ b->flags &= ~BF_KERN_SPLICING; si->flags &= ~SI_FL_CAP_SPLICE; put_pipe(b->pipe); b->pipe = NULL; return -1; } /* here we have another error */ si->flags |= SI_FL_ERR; retval = 1; break; } /* ret <= 0 */ if (b->to_forward != BUF_INFINITE_FORWARD) b->to_forward -= ret; b->total += ret; b->pipe->data += ret; b->flags |= BF_READ_PARTIAL; b->flags &= ~BF_OUT_EMPTY; if (b->pipe->data >= SPLICE_FULL_HINT || ret >= global.tune.recv_enough) { /* We've read enough of it for this time. */ retval = 1; break; } } /* while */ if (unlikely(!b->pipe->data)) { put_pipe(b->pipe); b->pipe = NULL; } return retval; }
/* * This function is called on a write event from a stream socket. * It returns 0 if the caller needs to poll before calling it again, otherwise * non-zero. */ int stream_sock_write(int fd) { struct stream_interface *si = fdtab[fd].owner; struct buffer *b = si->ob; int retval = 1; #ifdef DEBUG_FULL fprintf(stderr,"stream_sock_write : fd=%d, owner=%p\n", fd, fdtab[fd].owner); #endif retval = 1; if (fdtab[fd].state == FD_STERROR) goto out_error; /* we might have been called just after an asynchronous shutw */ if (b->flags & BF_SHUTW) goto out_wakeup; if (likely(!(b->flags & BF_OUT_EMPTY) || si->send_proxy_ofs)) { /* OK there are data waiting to be sent */ retval = stream_sock_write_loop(si, b); if (retval < 0) goto out_error; else if (retval == 0 && si->send_proxy_ofs) goto out_may_wakeup; /* we failed to send the PROXY string */ } else { /* may be we have received a connection acknowledgement in TCP mode without data */ if (likely(fdtab[fd].state == FD_STCONN)) { /* We have no data to send to check the connection, and * getsockopt() will not inform us whether the connection * is still pending. So we'll reuse connect() to check the * state of the socket. This has the advantage of givig us * the following info : * - error * - connecting (EALREADY, EINPROGRESS) * - connected (EISCONN, 0) */ if ((connect(fd, fdinfo[fd].peeraddr, fdinfo[fd].peerlen) == 0)) errno = 0; if (errno == EALREADY || errno == EINPROGRESS) { retval = 0; goto out_may_wakeup; } if (errno && errno != EISCONN) goto out_error; /* OK we just need to indicate that we got a connection * and that we wrote nothing. */ b->flags |= BF_WRITE_NULL; fdtab[fd].state = FD_STREADY; } /* Funny, we were called to write something but there wasn't * anything. We can get there, for example if we were woken up * on a write event to finish the splice, but the send_max is 0 * so we cannot write anything from the buffer. Let's disable * the write event and pretend we never came there. */ } if (b->flags & BF_OUT_EMPTY) { /* the connection is established but we can't write. Either the * buffer is empty, or we just refrain from sending because the * send_max limit was reached. Maybe we just wrote the last * chunk and need to close. */ if (((b->flags & (BF_SHUTW|BF_HIJACK|BF_SHUTW_NOW)) == BF_SHUTW_NOW) && (si->state == SI_ST_EST)) { stream_sock_shutw(si); goto out_wakeup; } if ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_FULL|BF_HIJACK)) == 0) si->flags |= SI_FL_WAIT_DATA; EV_FD_CLR(fd, DIR_WR); b->wex = TICK_ETERNITY; } out_may_wakeup: if (b->flags & BF_WRITE_ACTIVITY) { /* update timeout if we have written something */ if ((b->flags & (BF_OUT_EMPTY|BF_SHUTW|BF_WRITE_PARTIAL)) == BF_WRITE_PARTIAL) b->wex = tick_add_ifset(now_ms, b->wto); out_wakeup: if (tick_isset(si->ib->rex) && !(si->flags & SI_FL_INDEP_STR)) { /* Note: to prevent the client from expiring read timeouts * during writes, we refresh it. We only do this if the * interface is not configured for "independant streams", * because for some applications it's better not to do this, * for instance when continuously exchanging small amounts * of data which can full the socket buffers long before a * write timeout is detected. */ si->ib->rex = tick_add_ifset(now_ms, si->ib->rto); } /* the producer might be waiting for more room to store data */ if (likely((b->flags & (BF_SHUTW|BF_WRITE_PARTIAL|BF_FULL|BF_DONT_READ)) == BF_WRITE_PARTIAL && (b->prod->flags & SI_FL_WAIT_ROOM))) b->prod->chk_rcv(b->prod); /* we have to wake up if there is a special event or if we don't have * any more data to forward and it's not planned to send any more. */ if (likely((b->flags & (BF_WRITE_NULL|BF_WRITE_ERROR|BF_SHUTW)) || ((b->flags & BF_OUT_EMPTY) && !b->to_forward) || si->state != SI_ST_EST || b->prod->state != SI_ST_EST)) task_wakeup(si->owner, TASK_WOKEN_IO); } fdtab[fd].ev &= ~FD_POLL_OUT; return retval; out_error: /* Write error on the file descriptor. We mark the FD as STERROR so * that we don't use it anymore. The error is reported to the stream * interface which will take proper action. We must not perturbate the * buffer because the stream interface wants to ensure transparent * connection retries. */ fdtab[fd].state = FD_STERROR; fdtab[fd].ev &= ~FD_POLL_STICKY; EV_FD_REM(fd); si->flags |= SI_FL_ERR; task_wakeup(si->owner, TASK_WOKEN_IO); return 1; }
/* * this function is called on a read event from a stream socket. * It returns 0 if we have a high confidence that we will not be * able to read more data without polling first. Returns non-zero * otherwise. */ int stream_sock_read(int fd) { struct stream_interface *si = fdtab[fd].owner; struct buffer *b = si->ib; int ret, max, retval, cur_read; int read_poll = MAX_READ_POLL_LOOPS; #ifdef DEBUG_FULL fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner); #endif retval = 1; /* stop immediately on errors. Note that we DON'T want to stop on * POLL_ERR, as the poller might report a write error while there * are still data available in the recv buffer. This typically * happens when we send too large a request to a backend server * which rejects it before reading it all. */ if (fdtab[fd].state == FD_STERROR) goto out_error; /* stop here if we reached the end of data */ if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP) goto out_shutdown_r; /* maybe we were called immediately after an asynchronous shutr */ if (b->flags & BF_SHUTR) goto out_wakeup; #if defined(CONFIG_HAP_LINUX_SPLICE) if (b->to_forward >= MIN_SPLICE_FORWARD && b->flags & BF_KERN_SPLICING) { /* Under Linux, if FD_POLL_HUP is set, we have reached the end. * Since older splice() implementations were buggy and returned * EAGAIN on end of read, let's bypass the call to splice() now. */ if (fdtab[fd].ev & FD_POLL_HUP) goto out_shutdown_r; retval = stream_sock_splice_in(b, si); if (retval >= 0) { if (si->flags & SI_FL_ERR) goto out_error; if (b->flags & BF_READ_NULL) goto out_shutdown_r; goto out_wakeup; } /* splice not possible (anymore), let's go on on standard copy */ } #endif cur_read = 0; while (1) { max = buffer_max_len(b) - b->l; if (max <= 0) { b->flags |= BF_FULL; si->flags |= SI_FL_WAIT_ROOM; break; } /* * 1. compute the maximum block size we can read at once. */ if (b->l == 0) { /* let's realign the buffer to optimize I/O */ b->r = b->w = b->lr = b->data; } else if (b->r > b->w) { /* remaining space wraps at the end, with a moving limit */ if (max > b->data + b->size - b->r) max = b->data + b->size - b->r; } /* else max is already OK */ /* * 2. read the largest possible block */ ret = recv(fd, b->r, max, 0); if (ret > 0) { b->r += ret; b->l += ret; cur_read += ret; /* if we're allowed to directly forward data, we must update send_max */ if (b->to_forward && !(b->flags & (BF_SHUTW|BF_SHUTW_NOW))) { unsigned long fwd = ret; if (b->to_forward != BUF_INFINITE_FORWARD) { if (fwd > b->to_forward) fwd = b->to_forward; b->to_forward -= fwd; } b->send_max += fwd; b->flags &= ~BF_OUT_EMPTY; } if (fdtab[fd].state == FD_STCONN) fdtab[fd].state = FD_STREADY; b->flags |= BF_READ_PARTIAL; if (b->r == b->data + b->size) { b->r = b->data; /* wrap around the buffer */ } b->total += ret; if (b->l >= buffer_max_len(b)) { /* The buffer is now full, there's no point in going through * the loop again. */ if (!(b->flags & BF_STREAMER_FAST) && (cur_read == b->l)) { b->xfer_small = 0; b->xfer_large++; if (b->xfer_large >= 3) { /* we call this buffer a fast streamer if it manages * to be filled in one call 3 consecutive times. */ b->flags |= (BF_STREAMER | BF_STREAMER_FAST); //fputc('+', stderr); } } else if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) && (cur_read <= b->size / 2)) { b->xfer_large = 0; b->xfer_small++; if (b->xfer_small >= 2) { /* if the buffer has been at least half full twice, * we receive faster than we send, so at least it * is not a "fast streamer". */ b->flags &= ~BF_STREAMER_FAST; //fputc('-', stderr); } } else { b->xfer_small = 0; b->xfer_large = 0; } b->flags |= BF_FULL; si->flags |= SI_FL_WAIT_ROOM; break; } /* if too many bytes were missing from last read, it means that * it's pointless trying to read again because the system does * not have them in buffers. BTW, if FD_POLL_HUP was present, * it means that we have reached the end and that the connection * is closed. */ if (ret < max) { if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) && (cur_read <= b->size / 2)) { b->xfer_large = 0; b->xfer_small++; if (b->xfer_small >= 3) { /* we have read less than half of the buffer in * one pass, and this happened at least 3 times. * This is definitely not a streamer. */ b->flags &= ~(BF_STREAMER | BF_STREAMER_FAST); //fputc('!', stderr); } } /* unfortunately, on level-triggered events, POLL_HUP * is generally delivered AFTER the system buffer is * empty, so this one might never match. */ if (fdtab[fd].ev & FD_POLL_HUP) goto out_shutdown_r; /* if a streamer has read few data, it may be because we * have exhausted system buffers. It's not worth trying * again. */ if (b->flags & BF_STREAMER) break; /* generally if we read something smaller than 1 or 2 MSS, * it means that either we have exhausted the system's * buffers (streamer or question-response protocol) or * that the connection will be closed. Streamers are * easily detected so we return early. For other cases, * it's still better to perform a last read to be sure, * because it may save one complete poll/read/wakeup cycle * in case of shutdown. */ if (ret < MIN_RET_FOR_READ_LOOP && b->flags & BF_STREAMER) break; /* if we read a large block smaller than what we requested, * it's almost certain we'll never get anything more. */ if (ret >= global.tune.recv_enough) break; } if ((b->flags & BF_READ_DONTWAIT) || --read_poll <= 0) break; } else if (ret == 0) { /* connection closed */ goto out_shutdown_r; } else if (errno == EAGAIN) { /* Ignore EAGAIN but inform the poller that there is * nothing to read left if we did not read much, ie * less than what we were still expecting to read. * But we may have done some work justifying to notify * the task. */ if (cur_read < MIN_RET_FOR_READ_LOOP) retval = 0; break; } else { goto out_error; } } /* while (1) */ out_wakeup: /* We might have some data the consumer is waiting for. * We can do fast-forwarding, but we avoid doing this for partial * buffers, because it is very likely that it will be done again * immediately afterwards once the following data is parsed (eg: * HTTP chunking). */ if (b->pipe || /* always try to send spliced data */ (b->send_max == b->l && (b->cons->flags & SI_FL_WAIT_DATA))) { int last_len = b->pipe ? b->pipe->data : 0; b->cons->chk_snd(b->cons); /* check if the consumer has freed some space */ if (!(b->flags & BF_FULL) && (!last_len || !b->pipe || b->pipe->data < last_len)) si->flags &= ~SI_FL_WAIT_ROOM; } if (si->flags & SI_FL_WAIT_ROOM) { EV_FD_CLR(fd, DIR_RD); b->rex = TICK_ETERNITY; } else if ((b->flags & (BF_SHUTR|BF_READ_PARTIAL|BF_FULL|BF_DONT_READ|BF_READ_NOEXP)) == BF_READ_PARTIAL) b->rex = tick_add_ifset(now_ms, b->rto); /* we have to wake up if there is a special event or if we don't have * any more data to forward. */ if ((b->flags & (BF_READ_NULL|BF_READ_ERROR)) || si->state != SI_ST_EST || (si->flags & SI_FL_ERR) || ((b->flags & BF_READ_PARTIAL) && (!b->to_forward || b->cons->state != SI_ST_EST))) task_wakeup(si->owner, TASK_WOKEN_IO); if (b->flags & BF_READ_ACTIVITY) b->flags &= ~BF_READ_DONTWAIT; fdtab[fd].ev &= ~FD_POLL_IN; return retval; out_shutdown_r: /* we received a shutdown */ fdtab[fd].ev &= ~FD_POLL_HUP; b->flags |= BF_READ_NULL; if (b->flags & BF_AUTO_CLOSE) buffer_shutw_now(b); stream_sock_shutr(si); goto out_wakeup; out_error: /* Read error on the file descriptor. We mark the FD as STERROR so * that we don't use it anymore. The error is reported to the stream * interface which will take proper action. We must not perturbate the * buffer because the stream interface wants to ensure transparent * connection retries. */ fdtab[fd].state = FD_STERROR; fdtab[fd].ev &= ~FD_POLL_STICKY; EV_FD_REM(fd); si->flags |= SI_FL_ERR; retval = 1; goto out_wakeup; }
/* Finish a session accept() for a proxy (TCP or HTTP). It returns a negative * value in case of a critical failure which must cause the listener to be * disabled, a positive value in case of success, or zero if it is a success * but the session must be closed ASAP (eg: monitoring). */ int frontend_accept(struct session *s) { int cfd = s->si[0].fd; tv_zero(&s->logs.tv_request); s->logs.t_queue = -1; s->logs.t_connect = -1; s->logs.t_data = -1; s->logs.t_close = 0; s->logs.bytes_in = s->logs.bytes_out = 0; s->logs.prx_queue_size = 0; /* we get the number of pending conns before us */ s->logs.srv_queue_size = 0; /* we will get this number soon */ /* FIXME: the logs are horribly complicated now, because they are * defined in <p>, <p>, and later <be> and <be>. */ s->do_log = sess_log; /* default error reporting function, may be changed by analysers */ s->srv_error = default_srv_error; /* Adjust some socket options */ if (s->listener->addr.ss_family == AF_INET || s->listener->addr.ss_family == AF_INET6) { if (setsockopt(cfd, IPPROTO_TCP, TCP_NODELAY, (char *) &one, sizeof(one)) == -1) goto out_return; if (s->fe->options & PR_O_TCP_CLI_KA) setsockopt(cfd, SOL_SOCKET, SO_KEEPALIVE, (char *) &one, sizeof(one)); if (s->fe->options & PR_O_TCP_NOLING) setsockopt(cfd, SOL_SOCKET, SO_LINGER, (struct linger *) &nolinger, sizeof(struct linger)); #if defined(TCP_MAXSEG) if (s->listener->maxseg < 0) { /* we just want to reduce the current MSS by that value */ int mss; socklen_t mss_len = sizeof(mss); if (getsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, &mss_len) == 0) { mss += s->listener->maxseg; /* remember, it's < 0 */ setsockopt(cfd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)); } } #endif } if (global.tune.client_sndbuf) setsockopt(cfd, SOL_SOCKET, SO_SNDBUF, &global.tune.client_sndbuf, sizeof(global.tune.client_sndbuf)); if (global.tune.client_rcvbuf) setsockopt(cfd, SOL_SOCKET, SO_RCVBUF, &global.tune.client_rcvbuf, sizeof(global.tune.client_rcvbuf)); if (s->fe->mode == PR_MODE_HTTP) { /* the captures are only used in HTTP frontends */ if (unlikely(s->fe->nb_req_cap > 0 && (s->txn.req.cap = pool_alloc2(s->fe->req_cap_pool)) == NULL)) goto out_return; /* no memory */ if (unlikely(s->fe->nb_rsp_cap > 0 && (s->txn.rsp.cap = pool_alloc2(s->fe->rsp_cap_pool)) == NULL)) goto out_free_reqcap; /* no memory */ } if (s->fe->acl_requires & ACL_USE_L7_ANY) { /* we have to allocate header indexes only if we know * that we may make use of them. This of course includes * (mode == PR_MODE_HTTP). */ s->txn.hdr_idx.size = global.tune.max_http_hdr; if (unlikely((s->txn.hdr_idx.v = pool_alloc2(pool2_hdr_idx)) == NULL)) goto out_free_rspcap; /* no memory */ /* and now initialize the HTTP transaction state */ http_init_txn(s); } if ((s->fe->mode == PR_MODE_TCP || s->fe->mode == PR_MODE_HTTP) && (!LIST_ISEMPTY(&s->fe->logsrvs))) { if (likely(s->fe->to_log)) { /* we have the client ip */ if (s->logs.logwait & LW_CLIP) if (!(s->logs.logwait &= ~LW_CLIP)) s->do_log(s); } else { char pn[INET6_ADDRSTRLEN], sn[INET6_ADDRSTRLEN]; if (!(s->flags & SN_FRT_ADDR_SET)) get_frt_addr(s); switch (addr_to_str(&s->req->prod->addr.from, pn, sizeof(pn))) { case AF_INET: case AF_INET6: addr_to_str(&s->req->prod->addr.to, sn, sizeof(sn)); send_log(s->fe, LOG_INFO, "Connect from %s:%d to %s:%d (%s/%s)\n", pn, get_host_port(&s->req->prod->addr.from), sn, get_host_port(&s->req->prod->addr.to), s->fe->id, (s->fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP"); break; case AF_UNIX: /* UNIX socket, only the destination is known */ send_log(s->fe, LOG_INFO, "Connect to unix:%d (%s/%s)\n", s->listener->luid, s->fe->id, (s->fe->mode == PR_MODE_HTTP) ? "HTTP" : "TCP"); break; } } } if (unlikely((global.mode & MODE_DEBUG) && (!(global.mode & MODE_QUIET) || (global.mode & MODE_VERBOSE)))) { char pn[INET6_ADDRSTRLEN]; int len = 0; if (!(s->flags & SN_FRT_ADDR_SET)) get_frt_addr(s); switch (addr_to_str(&s->req->prod->addr.from, pn, sizeof(pn))) { case AF_INET: case AF_INET6: len = sprintf(trash, "%08x:%s.accept(%04x)=%04x from [%s:%d]\n", s->uniq_id, s->fe->id, (unsigned short)s->listener->fd, (unsigned short)cfd, pn, get_host_port(&s->req->prod->addr.from)); break; case AF_UNIX: /* UNIX socket, only the destination is known */ len = sprintf(trash, "%08x:%s.accept(%04x)=%04x from [unix:%d]\n", s->uniq_id, s->fe->id, (unsigned short)s->listener->fd, (unsigned short)cfd, s->listener->luid); break; } write(1, trash, len); } if (s->fe->mode == PR_MODE_HTTP) s->req->flags |= BF_READ_DONTWAIT; /* one read is usually enough */ /* note: this should not happen anymore since there's always at least the switching rules */ if (!s->req->analysers) { buffer_auto_connect(s->req); /* don't wait to establish connection */ buffer_auto_close(s->req); /* let the producer forward close requests */ } s->req->rto = s->fe->timeout.client; s->rep->wto = s->fe->timeout.client; fdtab[cfd].flags = FD_FL_TCP | FD_FL_TCP_NODELAY; if (s->fe->options & PR_O_TCP_NOLING) fdtab[cfd].flags |= FD_FL_TCP_NOLING; if (unlikely((s->fe->mode == PR_MODE_HTTP && (s->flags & SN_MONITOR)) || (s->fe->mode == PR_MODE_HEALTH && ((s->fe->options2 & PR_O2_CHK_ANY) == PR_O2_HTTP_CHK)))) { /* Either we got a request from a monitoring system on an HTTP instance, * or we're in health check mode with the 'httpchk' option enabled. In * both cases, we return a fake "HTTP/1.0 200 OK" response and we exit. */ struct chunk msg; chunk_initstr(&msg, "HTTP/1.0 200 OK\r\n\r\n"); stream_int_retnclose(&s->si[0], &msg); /* forge a 200 response */ s->req->analysers = 0; s->task->expire = s->rep->wex; EV_FD_CLR(cfd, DIR_RD); } else if (unlikely(s->fe->mode == PR_MODE_HEALTH)) { /* health check mode, no client reading */ struct chunk msg; chunk_initstr(&msg, "OK\n"); stream_int_retnclose(&s->si[0], &msg); /* forge an "OK" response */ s->req->analysers = 0; s->task->expire = s->rep->wex; EV_FD_CLR(cfd, DIR_RD); } /* everything's OK, let's go on */ return 1; /* Error unrolling */ out_free_rspcap: pool_free2(s->fe->rsp_cap_pool, s->txn.rsp.cap); out_free_reqcap: pool_free2(s->fe->req_cap_pool, s->txn.req.cap); out_return: return -1; }
/* This function is called on a read event from a listening socket, corresponding * to an accept. It tries to accept as many connections as possible, and for each * calls the listener's accept handler (generally the frontend's accept handler). */ int stream_sock_accept(int fd) { struct listener *l = fdtab[fd].owner; struct proxy *p = l->frontend; int max_accept = global.tune.maxaccept; int cfd; int ret; if (unlikely(l->nbconn >= l->maxconn)) { EV_FD_CLR(l->fd, DIR_RD); l->state = LI_FULL; return 0; } if (p && p->fe_sps_lim) { int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0); if (max_accept > max) max_accept = max; } while ((!p || p->feconn < p->maxconn) && actconn < global.maxconn && max_accept--) { struct sockaddr_storage addr; socklen_t laddr = sizeof(addr); cfd = accept(fd, (struct sockaddr *)&addr, &laddr); if (unlikely(cfd == -1)) { switch (errno) { case EAGAIN: case EINTR: case ECONNABORTED: return 0; /* nothing more to accept */ case ENFILE: if (p) send_log(p, LOG_EMERG, "Proxy %s reached system FD limit at %d. Please check system tunables.\n", p->id, maxfd); return 0; case EMFILE: if (p) send_log(p, LOG_EMERG, "Proxy %s reached process FD limit at %d. Please check 'ulimit-n' and restart.\n", p->id, maxfd); return 0; case ENOBUFS: case ENOMEM: if (p) send_log(p, LOG_EMERG, "Proxy %s reached system memory limit at %d sockets. Please check system tunables.\n", p->id, maxfd); return 0; default: return 0; } } if (unlikely(cfd >= global.maxsock)) { send_log(p, LOG_EMERG, "Proxy %s reached the configured maximum connection limit. Please check the global 'maxconn' value.\n", p->id); close(cfd); return 0; } jobs++; actconn++; totalconn++; l->nbconn++; if (l->counters) { if (l->nbconn > l->counters->conn_max) l->counters->conn_max = l->nbconn; } ret = l->accept(l, cfd, &addr); if (unlikely(ret <= 0)) { /* The connection was closed by session_accept(). Either * we just have to ignore it (ret == 0) or it's a critical * error due to a resource shortage, and we must stop the * listener (ret < 0). */ jobs--; actconn--; l->nbconn--; if (ret == 0) /* successful termination */ continue; if (p) { disable_listener(l); p->state = PR_STIDLE; } return 0; } if (l->nbconn >= l->maxconn) { EV_FD_CLR(l->fd, DIR_RD); l->state = LI_FULL; } } /* end of while (p->feconn < p->maxconn) */ return 0; }