void applet_run_active() { struct appctx *curr; struct stream_interface *si; if (LIST_ISEMPTY(&applet_active_queue)) return; /* move active queue to run queue */ applet_active_queue.n->p = &applet_cur_queue; applet_active_queue.p->n = &applet_cur_queue; applet_cur_queue = applet_active_queue; LIST_INIT(&applet_active_queue); /* The list is only scanned from the head. This guarantees that if any * applet removes another one, there is no side effect while walking * through the list. */ while (!LIST_ISEMPTY(&applet_cur_queue)) { curr = LIST_ELEM(applet_cur_queue.n, typeof(curr), runq); si = curr->owner; /* Now we'll try to allocate the input buffer. We wake up the * applet in all cases. So this is the applet responsibility to * check if this buffer was allocated or not. This let a chance * for applets to do some other processing if needed. */ if (!channel_alloc_buffer(si_ic(si), &curr->buffer_wait)) si_applet_cant_put(si); /* We always pretend the applet can't get and doesn't want to * put, it's up to it to change this if needed. This ensures * that one applet which ignores any event will not spin. */ si_applet_cant_get(si); si_applet_stop_put(si); curr->applet->fct(curr); si_applet_wake_cb(si); channel_release_buffer(si_ic(si), &curr->buffer_wait); if (applet_cur_queue.n == &curr->runq) { /* curr was left in the list, move it back to the active list */ LIST_DEL(&curr->runq); LIST_ADDQ(&applet_active_queue, &curr->runq); } } }
/* * This function performs a shutdown-read on a stream interface attached to an * applet in a connected or init state (it does nothing for other states). It * either shuts the read side or marks itself as closed. The buffer flags are * updated to reflect the new state. If the stream interface has SI_FL_NOHALF, * we also forward the close to the write side. The owner task is woken up if * it exists. */ static void stream_int_shutr_applet(struct stream_interface *si) { struct channel *ic = si_ic(si); ic->flags &= ~CF_SHUTR_NOW; if (ic->flags & CF_SHUTR) return; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_ROOM; /* Note: on shutr, we don't call the applet */ if (si->state != SI_ST_EST && si->state != SI_ST_CON) return; if (si_oc(si)->flags & CF_SHUTW) { si_applet_release(si); si->state = SI_ST_DIS; si->exp = TICK_ETERNITY; } else if (si->flags & SI_FL_NOHALF) { /* we want to immediately forward this close to the write side */ return stream_int_shutw_applet(si); } }
/* default chk_snd function for scheduled tasks */ static void stream_int_chk_snd(struct stream_interface *si) { struct channel *oc = si_oc(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, si_ic(si)->flags, oc->flags); if (unlikely(si->state != SI_ST_EST || (oc->flags & CF_SHUTW))) return; if (!(si->flags & SI_FL_WAIT_DATA) || /* not waiting for data */ channel_is_empty(oc)) /* called with nothing to send ! */ return; /* Otherwise there are remaining data to be sent in the buffer, * so we tell the handler. */ si->flags &= ~SI_FL_WAIT_DATA; if (!tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); }
/* * This function performs a shutdown-read on a stream interface attached to * a connection in a connected or init state (it does nothing for other * states). It either shuts the read side or marks itself as closed. The buffer * flags are updated to reflect the new state. If the stream interface has * SI_FL_NOHALF, we also forward the close to the write side. If a control * layer is defined, then it is supposed to be a socket layer and file * descriptors are then shutdown or closed accordingly. The function * automatically disables polling if needed. */ static void stream_int_shutr_conn(struct stream_interface *si) { struct connection *conn = __objt_conn(si->end); struct channel *ic = si_ic(si); ic->flags &= ~CF_SHUTR_NOW; if (ic->flags & CF_SHUTR) return; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_ROOM; if (si->state != SI_ST_EST && si->state != SI_ST_CON) return; if (si_oc(si)->flags & CF_SHUTW) { conn_full_close(conn); si->state = SI_ST_DIS; si->exp = TICK_ETERNITY; } else if (si->flags & SI_FL_NOHALF) { /* we want to immediately forward this close to the write side */ return stream_int_shutw_conn(si); } else if (conn->ctrl) { /* we want the caller to disable polling on this FD */ conn_data_stop_recv(conn); } }
/* * This function performs a shutdown-read on a detached stream interface in a * connected or init state (it does nothing for other states). It either shuts * the read side or marks itself as closed. The buffer flags are updated to * reflect the new state. If the stream interface has SI_FL_NOHALF, we also * forward the close to the write side. The owner task is woken up if it exists. */ static void stream_int_shutr(struct stream_interface *si) { struct channel *ic = si_ic(si); ic->flags &= ~CF_SHUTR_NOW; if (ic->flags & CF_SHUTR) return; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_ROOM; if (si->state != SI_ST_EST && si->state != SI_ST_CON) return; if (si_oc(si)->flags & CF_SHUTW) { si->state = SI_ST_DIS; si->exp = TICK_ETERNITY; } else if (si->flags & SI_FL_NOHALF) { /* we want to immediately forward this close to the write side */ return stream_int_shutw(si); } /* note that if the task exists, it must unregister itself once it runs */ if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); }
/* This function is designed to be called from within the stream handler to * update the channels' expiration timers and the stream interface's flags * based on the channels' flags. It needs to be called only once after the * channels' flags have settled down, and before they are cleared, though it * doesn't harm to call it as often as desired (it just slightly hurts * performance). It must not be called from outside of the stream handler, * as what it does will be used to compute the stream task's expiration. */ void stream_int_update(struct stream_interface *si) { struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); if (!(ic->flags & CF_SHUTR)) { /* Read not closed, update FD status and timeout for reads */ if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic)) { /* stop reading */ if (!(si->flags & SI_FL_WAIT_ROOM)) { if (!(ic->flags & CF_DONT_READ)) /* full */ si->flags |= SI_FL_WAIT_ROOM; ic->rex = TICK_ETERNITY; } } else { /* (re)start reading and update timeout. Note: we don't recompute the timeout * everytime we get here, otherwise it would risk never to expire. We only * update it if is was not yet set. The stream socket handler will already * have updated it if there has been a completed I/O. */ si->flags &= ~SI_FL_WAIT_ROOM; if (!(ic->flags & (CF_READ_NOEXP|CF_DONT_READ)) && !tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); } } if (!(oc->flags & CF_SHUTW)) { /* Write not closed, update FD status and timeout for writes */ if (channel_is_empty(oc)) { /* stop writing */ if (!(si->flags & SI_FL_WAIT_DATA)) { if ((oc->flags & CF_SHUTW_NOW) == 0) si->flags |= SI_FL_WAIT_DATA; oc->wex = TICK_ETERNITY; } } else { /* (re)start writing and update timeout. Note: we don't recompute the timeout * everytime we get here, otherwise it would risk never to expire. We only * update it if is was not yet set. The stream socket handler will already * have updated it if there has been a completed I/O. */ si->flags &= ~SI_FL_WAIT_DATA; if (!tick_isset(oc->wex)) { oc->wex = tick_add_ifset(now_ms, oc->wto); if (tick_isset(ic->rex) && !(si->flags & SI_FL_INDEP_STR)) { /* Note: depending on the protocol, we don't know if we're waiting * for incoming data or not. So in order to prevent the socket from * expiring read timeouts during writes, we refresh the read timeout, * except if it was already infinite or if we have explicitly setup * independent streams. */ ic->rex = tick_add_ifset(now_ms, ic->rto); } } } } }
/* to be called only when in SI_ST_DIS with SI_FL_ERR */ void stream_int_report_error(struct stream_interface *si) { if (!si->err_type) si->err_type = SI_ET_DATA_ERR; si_oc(si)->flags |= CF_WRITE_ERROR; si_ic(si)->flags |= CF_READ_ERROR; }
/* * This function propagates a null read received on a socket-based connection. * It updates the stream interface. If the stream interface has SI_FL_NOHALF, * the close is also forwarded to the write side as an abort. */ void stream_sock_read0(struct stream_interface *si) { struct connection *conn = __objt_conn(si->end); struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); HA_DBG("stream_sock_read0\n"); ic->flags &= ~CF_SHUTR_NOW; if (ic->flags & CF_SHUTR) return; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_ROOM; if (si->state != SI_ST_EST && si->state != SI_ST_CON) return; if (oc->flags & CF_SHUTW) goto do_close; if (si->flags & SI_FL_NOHALF) { /* we want to immediately forward this close to the write side */ /* force flag on ssl to keep stream in cache */ conn_data_shutw_hard(conn); goto do_close; } /* otherwise that's just a normal read shutdown */ __conn_data_stop_recv(conn); return; do_close: /* OK we completely close the socket here just as if we went through si_shut[rw]() */ conn_full_close(conn); ic->flags &= ~CF_SHUTR_NOW; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; oc->flags &= ~CF_SHUTW_NOW; oc->flags |= CF_SHUTW; oc->wex = TICK_ETERNITY; si->flags &= ~(SI_FL_WAIT_DATA | SI_FL_WAIT_ROOM); si->state = SI_ST_DIS; si->exp = TICK_ETERNITY; return; }
/* * This function performs a shutdown-write on a stream interface attached to an * applet in a connected or init state (it does nothing for other states). It * either shuts the write side or marks itself as closed. The buffer flags are * updated to reflect the new state. It does also close everything if the SI * was marked as being in error state. The owner task is woken up if it exists. */ static void stream_int_shutw(struct stream_interface *si) { struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); oc->flags &= ~CF_SHUTW_NOW; if (oc->flags & CF_SHUTW) return; oc->flags |= CF_SHUTW; oc->wex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_DATA; switch (si->state) { case SI_ST_EST: /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. * However, if SI_FL_NOLINGER is explicitly set, we know there is * no risk so we close both sides immediately. */ if (!(si->flags & (SI_FL_ERR | SI_FL_NOLINGER)) && !(ic->flags & (CF_SHUTR|CF_DONT_READ))) return; /* fall through */ case SI_ST_CON: case SI_ST_CER: case SI_ST_QUE: case SI_ST_TAR: /* Note that none of these states may happen with applets */ si->state = SI_ST_DIS; si_applet_release(si); default: si->flags &= ~(SI_FL_WAIT_ROOM | SI_FL_NOLINGER); ic->flags &= ~CF_SHUTR_NOW; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->exp = TICK_ETERNITY; } /* note that if the task exists, it must unregister itself once it runs */ if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); }
/* chk_rcv function for applets */ static void stream_int_chk_rcv_applet(struct stream_interface *si) { struct channel *ic = si_ic(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, ic->flags, si_oc(si)->flags); if (unlikely(si->state != SI_ST_EST || (ic->flags & (CF_SHUTR|CF_DONT_READ)))) return; /* here we only wake the applet up if it was waiting for some room */ if (!(si->flags & SI_FL_WAIT_ROOM)) return; if (channel_may_recv(ic) && !ic->pipe) { /* (re)start reading */ appctx_wakeup(si_appctx(si)); } }
/* * Returns a message to the client ; the connection is shut down for read, * and the request is cleared so that no server connection can be initiated. * The buffer is marked for read shutdown on the other side to protect the * message, and the buffer write is enabled. The message is contained in a * "chunk". If it is null, then an empty message is used. The reply buffer does * not need to be empty before this, and its contents will not be overwritten. * The primary goal of this function is to return error messages to a client. */ void stream_int_retnclose(struct stream_interface *si, const struct chunk *msg) { struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); channel_auto_read(ic); channel_abort(ic); channel_auto_close(ic); channel_erase(ic); channel_truncate(oc); if (likely(msg && msg->len)) bo_inject(oc, msg->str, msg->len); oc->wex = tick_add_ifset(now_ms, oc->wto); channel_auto_read(oc); channel_auto_close(oc); channel_shutr_now(oc); }
/* default chk_rcv function for scheduled tasks */ static void stream_int_chk_rcv(struct stream_interface *si) { struct channel *ic = si_ic(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, ic->flags, si_oc(si)->flags); if (unlikely(si->state != SI_ST_EST || (ic->flags & (CF_SHUTR|CF_DONT_READ)))) return; if (!channel_may_recv(ic) || ic->pipe) { /* stop reading */ si->flags |= SI_FL_WAIT_ROOM; } else { /* (re)start reading */ si->flags &= ~SI_FL_WAIT_ROOM; if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); } }
/* Callback to be used by connection I/O handlers upon completion. It propagates * connection flags to the stream interface, updates the stream (which may or * may not take this opportunity to try to forward data), then update the * connection's polling based on the channels and stream interface's final * states. The function always returns 0. */ static int si_conn_wake_cb(struct connection *conn) { struct stream_interface *si = conn->owner; struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); /* First step, report to the stream-int what was detected at the * connection layer : errors and connection establishment. */ if (conn->flags & CO_FL_ERROR) si->flags |= SI_FL_ERR; if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED)))) { si->exp = TICK_ETERNITY; oc->flags |= CF_WRITE_NULL; } /* Second step : update the stream-int and channels, try to forward any * pending data, then possibly wake the stream up based on the new * stream-int status. */ stream_int_notify(si); /* Third step : update the connection's polling status based on what * was done above (eg: maybe some buffers got emptied). */ if (channel_is_empty(oc)) __conn_data_stop_send(conn); if (si->flags & SI_FL_WAIT_ROOM) { __conn_data_stop_recv(conn); } else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL|CF_DONT_READ)) == CF_READ_PARTIAL && channel_may_recv(ic)) { __conn_data_want_recv(conn); } return 0; }
/* This function is used for inter-stream-interface calls. It is called by the * consumer to inform the producer side that it may be interested in checking * for free space in the buffer. Note that it intentionally does not update * timeouts, so that we can still check them later at wake-up. This function is * dedicated to connection-based stream interfaces. */ static void stream_int_chk_rcv_conn(struct stream_interface *si) { struct channel *ic = si_ic(si); struct connection *conn = __objt_conn(si->end); if (unlikely(si->state > SI_ST_EST || (ic->flags & CF_SHUTR))) return; conn_refresh_polling_flags(conn); if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic)) { /* stop reading */ if (!(ic->flags & CF_DONT_READ)) /* full */ si->flags |= SI_FL_WAIT_ROOM; __conn_data_stop_recv(conn); } else { /* (re)start reading */ si->flags &= ~SI_FL_WAIT_ROOM; __conn_data_want_recv(conn); } conn_cond_update_data_polling(conn); }
/* Callback to be used by applet handlers upon completion. It updates the stream * (which may or may not take this opportunity to try to forward data), then * may disable the applet's based on the channels and stream interface's final * states. */ void si_applet_wake_cb(struct stream_interface *si) { struct channel *ic = si_ic(si); /* If the applet wants to write and the channel is closed, it's a * broken pipe and it must be reported. */ if ((si->flags & SI_FL_WANT_PUT) && (ic->flags & CF_SHUTR)) si->flags |= SI_FL_ERR; /* update the stream-int, channels, and possibly wake the stream up */ stream_int_notify(si); /* Get away from the active list if we can't work anymore. * We also do that if the main task has already scheduled, because it * saves a useless wakeup/pause/wakeup cycle causing one useless call * per session on average. */ if (task_in_rq(si_task(si)) || (((si->flags & (SI_FL_WANT_PUT|SI_FL_WAIT_ROOM)) != SI_FL_WANT_PUT) && ((si->flags & (SI_FL_WANT_GET|SI_FL_WAIT_DATA)) != SI_FL_WANT_GET))) appctx_pause(si_appctx(si)); }
/* Updates the polling status of a connection outside of the connection handler * based on the channel's flags and the stream interface's flags. It needs to be * called once after the channels' flags have settled down and the stream has * been updated. It is not designed to be called from within the connection * handler itself. */ void stream_int_update_conn(struct stream_interface *si) { struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); struct connection *conn = __objt_conn(si->end); if (!(ic->flags & CF_SHUTR)) { /* Read not closed */ if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic)) __conn_data_stop_recv(conn); else __conn_data_want_recv(conn); } if (!(oc->flags & CF_SHUTW)) { /* Write not closed */ if (channel_is_empty(oc)) __conn_data_stop_send(conn); else __conn_data_want_send(conn); } conn_cond_update_data_polling(conn); }
/* chk_snd function for applets */ static void stream_int_chk_snd_applet(struct stream_interface *si) { struct channel *oc = si_oc(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, si_ic(si)->flags, oc->flags); if (unlikely(si->state != SI_ST_EST || (oc->flags & CF_SHUTW))) return; /* we only wake the applet up if it was waiting for some data */ if (!(si->flags & SI_FL_WAIT_DATA)) return; if (!tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); if (!channel_is_empty(oc)) { /* (re)start sending */ appctx_wakeup(si_appctx(si)); } }
/* default update function for embedded tasks, to be used at the end of the i/o handler */ static void stream_int_update_embedded(struct stream_interface *si) { int old_flags = si->flags; struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, ic->flags, oc->flags); if (si->state != SI_ST_EST) return; if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW && channel_is_empty(oc)) si_shutw(si); if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0 && channel_may_recv(oc)) si->flags |= SI_FL_WAIT_DATA; /* we're almost sure that we need some space if the buffer is not * empty, even if it's not full, because the applets can't fill it. */ if ((ic->flags & (CF_SHUTR|CF_DONT_READ)) == 0 && !channel_is_empty(ic)) si->flags |= SI_FL_WAIT_ROOM; if (oc->flags & CF_WRITE_ACTIVITY) { if (tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); } if (ic->flags & CF_READ_ACTIVITY || (oc->flags & CF_WRITE_ACTIVITY && !(si->flags & SI_FL_INDEP_STR))) { if (tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); } /* save flags to detect changes */ old_flags = si->flags; if (likely((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL|CF_DONT_READ)) == CF_WRITE_PARTIAL && channel_may_recv(oc) && (si_opposite(si)->flags & SI_FL_WAIT_ROOM))) si_chk_rcv(si_opposite(si)); if (((ic->flags & CF_READ_PARTIAL) && !channel_is_empty(ic)) && (ic->pipe /* always try to send spliced data */ || (ic->buf->i == 0 && (si_opposite(si)->flags & SI_FL_WAIT_DATA)))) { si_chk_snd(si_opposite(si)); /* check if the consumer has freed some space */ if (channel_may_recv(ic) && !ic->pipe) si->flags &= ~SI_FL_WAIT_ROOM; } /* Note that we're trying to wake up in two conditions here : * - special event, which needs the holder task attention * - status indicating that the applet can go on working. This * is rather hard because we might be blocking on output and * don't want to wake up on input and vice-versa. The idea is * to only rely on the changes the chk_* might have performed. */ if (/* check stream interface changes */ ((old_flags & ~si->flags) & (SI_FL_WAIT_ROOM|SI_FL_WAIT_DATA)) || /* changes on the production side */ (ic->flags & (CF_READ_NULL|CF_READ_ERROR)) || si->state != SI_ST_EST || (si->flags & SI_FL_ERR) || ((ic->flags & CF_READ_PARTIAL) && (!ic->to_forward || si_opposite(si)->state != SI_ST_EST)) || /* changes on the consumption side */ (oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR)) || ((oc->flags & CF_WRITE_ACTIVITY) && ((oc->flags & CF_SHUTW) || ((oc->flags & CF_WAKE_WRITE) && (si_opposite(si)->state != SI_ST_EST || (channel_is_empty(oc) && !oc->to_forward)))))) { if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); } if (ic->flags & CF_READ_ACTIVITY) ic->flags &= ~CF_READ_DONTWAIT; }
/* * IO Handler to handle message exchance with a peer */ static void peer_io_handler(struct appctx *appctx) { struct stream_interface *si = appctx->owner; struct stream *s = si_strm(si); struct peers *curpeers = (struct peers *)strm_fe(s)->parent; int reql = 0; int repl = 0; while (1) { switchstate: switch(appctx->st0) { case PEER_SESS_ST_ACCEPT: appctx->ctx.peers.ptr = NULL; appctx->st0 = PEER_SESS_ST_GETVERSION; /* fall through */ case PEER_SESS_ST_GETVERSION: reql = bo_getline(si_oc(si), trash.str, trash.size); if (reql <= 0) { /* closed or EOL not found */ if (reql == 0) goto out; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } if (trash.str[reql-1] != '\n') { appctx->st0 = PEER_SESS_ST_END; goto switchstate; } else if (reql > 1 && (trash.str[reql-2] == '\r')) trash.str[reql-2] = 0; else trash.str[reql-1] = 0; bo_skip(si_oc(si), reql); /* test version */ if (strcmp(PEER_SESSION_PROTO_NAME " 1.0", trash.str) != 0) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRVERSION; /* test protocol */ if (strncmp(PEER_SESSION_PROTO_NAME " ", trash.str, strlen(PEER_SESSION_PROTO_NAME)+1) != 0) appctx->st1 = PEER_SESS_SC_ERRPROTO; goto switchstate; } appctx->st0 = PEER_SESS_ST_GETHOST; /* fall through */ case PEER_SESS_ST_GETHOST: reql = bo_getline(si_oc(si), trash.str, trash.size); if (reql <= 0) { /* closed or EOL not found */ if (reql == 0) goto out; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } if (trash.str[reql-1] != '\n') { appctx->st0 = PEER_SESS_ST_END; goto switchstate; } else if (reql > 1 && (trash.str[reql-2] == '\r')) trash.str[reql-2] = 0; else trash.str[reql-1] = 0; bo_skip(si_oc(si), reql); /* test hostname match */ if (strcmp(localpeer, trash.str) != 0) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRHOST; goto switchstate; } appctx->st0 = PEER_SESS_ST_GETPEER; /* fall through */ case PEER_SESS_ST_GETPEER: { struct peer *curpeer; char *p; reql = bo_getline(si_oc(si), trash.str, trash.size); if (reql <= 0) { /* closed or EOL not found */ if (reql == 0) goto out; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } if (trash.str[reql-1] != '\n') { /* Incomplete line, we quit */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } else if (reql > 1 && (trash.str[reql-2] == '\r')) trash.str[reql-2] = 0; else trash.str[reql-1] = 0; bo_skip(si_oc(si), reql); /* parse line "<peer name> <pid>" */ p = strchr(trash.str, ' '); if (!p) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRPROTO; goto switchstate; } *p = 0; /* lookup known peer */ for (curpeer = curpeers->remote; curpeer; curpeer = curpeer->next) { if (strcmp(curpeer->id, trash.str) == 0) break; } /* if unknown peer */ if (!curpeer) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRPEER; goto switchstate; } appctx->ctx.peers.ptr = curpeer; appctx->st0 = PEER_SESS_ST_GETTABLE; /* fall through */ } case PEER_SESS_ST_GETTABLE: { struct peer *curpeer = (struct peer *)appctx->ctx.peers.ptr; struct shared_table *st; struct peer_session *ps = NULL; unsigned long key_type; size_t key_size; char *p; reql = bo_getline(si_oc(si), trash.str, trash.size); if (reql <= 0) { /* closed or EOL not found */ if (reql == 0) goto out; appctx->ctx.peers.ptr = NULL; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* Re init appctx->ctx.peers.ptr to null, to handle correctly a release case */ appctx->ctx.peers.ptr = NULL; if (trash.str[reql-1] != '\n') { /* Incomplete line, we quit */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } else if (reql > 1 && (trash.str[reql-2] == '\r')) trash.str[reql-2] = 0; else trash.str[reql-1] = 0; bo_skip(si_oc(si), reql); /* Parse line "<table name> <type> <size>" */ p = strchr(trash.str, ' '); if (!p) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRPROTO; goto switchstate; } *p = 0; key_type = (unsigned long)atol(p+1); p = strchr(p+1, ' '); if (!p) { appctx->ctx.peers.ptr = NULL; appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRPROTO; goto switchstate; } key_size = (size_t)atoi(p); for (st = curpeers->tables; st; st = st->next) { /* If table name matches */ if (strcmp(st->table->id, trash.str) == 0) { /* Check key size mismatches, except for strings * which may be truncated as long as they fit in * a buffer. */ if (key_size != st->table->key_size && (key_type != STKTABLE_TYPE_STRING || 1 + 4 + 4 + key_size - 1 >= trash.size)) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRSIZE; goto switchstate; } /* If key type mismatches */ if (key_type != st->table->type) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRTYPE; goto switchstate; } /* lookup peer stream of current peer */ for (ps = st->sessions; ps; ps = ps->next) { if (ps->peer == curpeer) { /* If stream already active, replaced by new one */ if (ps->stream && ps->stream != s) { if (ps->peer->local) { /* Local connection, reply a retry */ appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_TRYAGAIN; goto switchstate; } peer_session_forceshutdown(ps->stream); } ps->stream = s; ps->appctx = appctx; break; } } break; } } /* If table not found */ if (!st){ appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRTABLE; goto switchstate; } /* If no peer session for current peer */ if (!ps) { appctx->st0 = PEER_SESS_ST_EXIT; appctx->st1 = PEER_SESS_SC_ERRPEER; goto switchstate; } appctx->ctx.peers.ptr = ps; appctx->st0 = PEER_SESS_ST_SENDSUCCESS; /* fall through */ } case PEER_SESS_ST_SENDSUCCESS: { struct peer_session *ps = (struct peer_session *)appctx->ctx.peers.ptr; repl = snprintf(trash.str, trash.size, "%d\n", PEER_SESS_SC_SUCCESSCODE); repl = bi_putblk(si_ic(si), trash.str, repl); if (repl <= 0) { if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* Register status code */ ps->statuscode = PEER_SESS_SC_SUCCESSCODE; /* Awake main task */ task_wakeup(ps->table->sync_task, TASK_WOKEN_MSG); /* Init cursors */ ps->teaching_origin =ps->lastpush = ps->lastack = ps->pushack = 0; ps->pushed = ps->update; /* Init confirm counter */ ps->confirm = 0; /* reset teaching and learning flags to 0 */ ps->flags &= PEER_TEACH_RESET; ps->flags &= PEER_LEARN_RESET; /* if current peer is local */ if (ps->peer->local) { /* if table need resyncfrom local and no process assined */ if ((ps->table->flags & SHTABLE_RESYNC_STATEMASK) == SHTABLE_RESYNC_FROMLOCAL && !(ps->table->flags & SHTABLE_F_RESYNC_ASSIGN)) { /* assign local peer for a lesson, consider lesson already requested */ ps->flags |= PEER_F_LEARN_ASSIGN; ps->table->flags |= (SHTABLE_F_RESYNC_ASSIGN|SHTABLE_F_RESYNC_PROCESS); } } else if ((ps->table->flags & SHTABLE_RESYNC_STATEMASK) == SHTABLE_RESYNC_FROMREMOTE && !(ps->table->flags & SHTABLE_F_RESYNC_ASSIGN)) { /* assign peer for a lesson */ ps->flags |= PEER_F_LEARN_ASSIGN; ps->table->flags |= SHTABLE_F_RESYNC_ASSIGN; } /* switch to waiting message state */ appctx->st0 = PEER_SESS_ST_WAITMSG; goto switchstate; } case PEER_SESS_ST_CONNECT: { struct peer_session *ps = (struct peer_session *)appctx->ctx.peers.ptr; /* Send headers */ repl = snprintf(trash.str, trash.size, PEER_SESSION_PROTO_NAME " 1.0\n%s\n%s %d\n%s %lu %d\n", ps->peer->id, localpeer, (int)getpid(), ps->table->table->id, ps->table->table->type, (int)ps->table->table->key_size); if (repl >= trash.size) { appctx->st0 = PEER_SESS_ST_END; goto switchstate; } repl = bi_putblk(si_ic(si), trash.str, repl); if (repl <= 0) { if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* switch to the waiting statuscode state */ appctx->st0 = PEER_SESS_ST_GETSTATUS; /* fall through */ } case PEER_SESS_ST_GETSTATUS: { struct peer_session *ps = (struct peer_session *)appctx->ctx.peers.ptr; if (si_ic(si)->flags & CF_WRITE_PARTIAL) ps->statuscode = PEER_SESS_SC_CONNECTEDCODE; reql = bo_getline(si_oc(si), trash.str, trash.size); if (reql <= 0) { /* closed or EOL not found */ if (reql == 0) goto out; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } if (trash.str[reql-1] != '\n') { /* Incomplete line, we quit */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } else if (reql > 1 && (trash.str[reql-2] == '\r')) trash.str[reql-2] = 0; else trash.str[reql-1] = 0; bo_skip(si_oc(si), reql); /* Register status code */ ps->statuscode = atoi(trash.str); /* Awake main task */ task_wakeup(ps->table->sync_task, TASK_WOKEN_MSG); /* If status code is success */ if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE) { /* Init cursors */ ps->teaching_origin = ps->lastpush = ps->lastack = ps->pushack = 0; ps->pushed = ps->update; /* Init confirm counter */ ps->confirm = 0; /* reset teaching and learning flags to 0 */ ps->flags &= PEER_TEACH_RESET; ps->flags &= PEER_LEARN_RESET; /* If current peer is local */ if (ps->peer->local) { /* Init cursors to push a resync */ ps->teaching_origin = ps->pushed = ps->table->table->update; /* flag to start to teach lesson */ ps->flags |= PEER_F_TEACH_PROCESS; } else if ((ps->table->flags & SHTABLE_RESYNC_STATEMASK) == SHTABLE_RESYNC_FROMREMOTE && !(ps->table->flags & SHTABLE_F_RESYNC_ASSIGN)) { /* If peer is remote and resync from remote is needed, and no peer currently assigned */ /* assign peer for a lesson */ ps->flags |= PEER_F_LEARN_ASSIGN; ps->table->flags |= SHTABLE_F_RESYNC_ASSIGN; } } else { /* Status code is not success, abort */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } appctx->st0 = PEER_SESS_ST_WAITMSG; /* fall through */ } case PEER_SESS_ST_WAITMSG: { struct peer_session *ps = (struct peer_session *)appctx->ctx.peers.ptr; struct stksess *ts, *newts = NULL; char c; int totl = 0; reql = bo_getblk(si_oc(si), (char *)&c, sizeof(c), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; if ((c & 0x80) || (c == 'D')) { /* Here we have data message */ unsigned int pushack; int srvid; uint32_t netinteger; /* Compute update remote version */ if (c & 0x80) { pushack = ps->pushack + (unsigned int)(c & 0x7F); } else { reql = bo_getblk(si_oc(si), (char *)&netinteger, sizeof(netinteger), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; pushack = ntohl(netinteger); } /* Read key. The string keys are read in two steps, the first step * consists in reading whatever fits into the table directly into * the pre-allocated key. The second step consists in simply * draining all exceeding data. This can happen for example after a * config reload with a smaller key size for the stick table than * what was previously set, or when facing the impossibility to * allocate a new stksess (for example when the table is full with * "nopurge"). */ if (ps->table->table->type == STKTABLE_TYPE_STRING) { unsigned int to_read, to_store; /* read size first */ reql = bo_getblk(si_oc(si), (char *)&netinteger, sizeof(netinteger), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; to_store = 0; to_read = ntohl(netinteger); if (to_read + totl > si_ob(si)->size) { /* impossible to read a key this large, abort */ reql = -1; goto incomplete; } newts = stksess_new(ps->table->table, NULL); if (newts) to_store = MIN(to_read, ps->table->table->key_size - 1); /* we read up to two blocks, the first one goes into the key, * the rest is drained into the trash. */ if (to_store) { reql = bo_getblk(si_oc(si), (char *)newts->key.key, to_store, totl); if (reql <= 0) /* closed or incomplete */ goto incomplete; newts->key.key[reql] = 0; totl += reql; to_read -= reql; } if (to_read) { reql = bo_getblk(si_oc(si), trash.str, to_read, totl); if (reql <= 0) /* closed or incomplete */ goto incomplete; totl += reql; } } else if (ps->table->table->type == STKTABLE_TYPE_INTEGER) { reql = bo_getblk(si_oc(si), (char *)&netinteger, sizeof(netinteger), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; newts = stksess_new(ps->table->table, NULL); if (newts) { netinteger = ntohl(netinteger); memcpy(newts->key.key, &netinteger, sizeof(netinteger)); } totl += reql; } else { /* type ip or binary */ newts = stksess_new(ps->table->table, NULL); reql = bo_getblk(si_oc(si), newts ? (char *)newts->key.key : trash.str, ps->table->table->key_size, totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; } /* read server id */ reql = bo_getblk(si_oc(si), (char *)&netinteger, sizeof(netinteger), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; srvid = ntohl(netinteger); /* update entry */ if (newts) { /* lookup for existing entry */ ts = stktable_lookup(ps->table->table, newts); if (ts) { /* the entry already exist, we can free ours */ stktable_touch(ps->table->table, ts, 0); stksess_free(ps->table->table, newts); newts = NULL; } else { struct eb32_node *eb; /* create new entry */ ts = stktable_store(ps->table->table, newts, 0); newts = NULL; /* don't reuse it */ ts->upd.key= (++ps->table->table->update)+(2^31); eb = eb32_insert(&ps->table->table->updates, &ts->upd); if (eb != &ts->upd) { eb32_delete(eb); eb32_insert(&ps->table->table->updates, &ts->upd); } } /* update entry */ if (srvid && stktable_data_ptr(ps->table->table, ts, STKTABLE_DT_SERVER_ID)) stktable_data_cast(stktable_data_ptr(ps->table->table, ts, STKTABLE_DT_SERVER_ID), server_id) = srvid; ps->pushack = pushack; } } else if (c == 'R') { /* Reset message: remote need resync */ /* reinit counters for a resync */ ps->lastpush = 0; ps->teaching_origin = ps->pushed = ps->table->table->update; /* reset teaching flags to 0 */ ps->flags &= PEER_TEACH_RESET; /* flag to start to teach lesson */ ps->flags |= PEER_F_TEACH_PROCESS; } else if (c == 'F') { /* Finish message, all known updates have been pushed by remote */ /* and remote is up to date */ /* If resync is in progress with remote peer */ if (ps->flags & PEER_F_LEARN_ASSIGN) { /* unassign current peer for learning */ ps->flags &= ~PEER_F_LEARN_ASSIGN; ps->table->flags &= ~(SHTABLE_F_RESYNC_ASSIGN|SHTABLE_F_RESYNC_PROCESS); /* Consider table is now up2date, resync resync no more needed from local neither remote */ ps->table->flags |= (SHTABLE_F_RESYNC_LOCAL|SHTABLE_F_RESYNC_REMOTE); } /* Increase confirm counter to launch a confirm message */ ps->confirm++; } else if (c == 'c') { /* confirm message, remote peer is now up to date with us */ /* If stopping state */ if (stopping) { /* Close session, push resync no more needed */ ps->flags |= PEER_F_TEACH_COMPLETE; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* reset teaching flags to 0 */ ps->flags &= PEER_TEACH_RESET; } else if (c == 'C') { /* Continue message, all known updates have been pushed by remote */ /* but remote is not up to date */ /* If resync is in progress with current peer */ if (ps->flags & PEER_F_LEARN_ASSIGN) { /* unassign current peer */ ps->flags &= ~PEER_F_LEARN_ASSIGN; ps->table->flags &= ~(SHTABLE_F_RESYNC_ASSIGN|SHTABLE_F_RESYNC_PROCESS); /* flag current peer is not up 2 date to try from an other */ ps->flags |= PEER_F_LEARN_NOTUP2DATE; /* reschedule a resync */ ps->table->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000)); task_wakeup(ps->table->sync_task, TASK_WOKEN_MSG); } ps->confirm++; } else if (c == 'A') { /* ack message */ uint32_t netinteger; reql = bo_getblk(si_oc(si), (char *)&netinteger, sizeof(netinteger), totl); if (reql <= 0) /* closed or EOL not found */ goto incomplete; totl += reql; /* Consider remote is up to date with "acked" version */ ps->update = ntohl(netinteger); } else { /* Unknown message */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* skip consumed message */ bo_skip(si_oc(si), totl); /* loop on that state to peek next message */ goto switchstate; incomplete: /* we get here when a bo_getblk() returns <= 0 in reql */ /* first, we may have to release newts */ if (newts) { stksess_free(ps->table->table, newts); newts = NULL; } if (reql < 0) { /* there was an error */ appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* Nothing to read, now we start to write */ /* Confirm finished or partial messages */ while (ps->confirm) { /* There is a confirm messages to send */ repl = bi_putchr(si_ic(si), 'c'); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->confirm--; } /* Need to request a resync */ if ((ps->flags & PEER_F_LEARN_ASSIGN) && (ps->table->flags & SHTABLE_F_RESYNC_ASSIGN) && !(ps->table->flags & SHTABLE_F_RESYNC_PROCESS)) { /* Current peer was elected to request a resync */ repl = bi_putchr(si_ic(si), 'R'); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->table->flags |= SHTABLE_F_RESYNC_PROCESS; } /* It remains some updates to ack */ if (ps->pushack != ps->lastack) { uint32_t netinteger; trash.str[0] = 'A'; netinteger = htonl(ps->pushack); memcpy(&trash.str[1], &netinteger, sizeof(netinteger)); repl = bi_putblk(si_ic(si), trash.str, 1+sizeof(netinteger)); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->lastack = ps->pushack; } if (ps->flags & PEER_F_TEACH_PROCESS) { /* current peer was requested for a lesson */ if (!(ps->flags & PEER_F_TEACH_STAGE1)) { /* lesson stage 1 not complete */ struct eb32_node *eb; eb = eb32_lookup_ge(&ps->table->table->updates, ps->pushed+1); while (1) { int msglen; struct stksess *ts; if (!eb) { /* flag lesson stage1 complete */ ps->flags |= PEER_F_TEACH_STAGE1; eb = eb32_first(&ps->table->table->updates); if (eb) ps->pushed = eb->key - 1; break; } ts = eb32_entry(eb, struct stksess, upd); msglen = peer_prepare_datamsg(ts, ps, trash.str, trash.size); if (msglen) { /* message to buffer */ repl = bi_putblk(si_ic(si), trash.str, msglen); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->lastpush = ps->pushed = ts->upd.key; } eb = eb32_next(eb); } } /* !TEACH_STAGE1 */ if (!(ps->flags & PEER_F_TEACH_STAGE2)) { /* lesson stage 2 not complete */ struct eb32_node *eb; eb = eb32_lookup_ge(&ps->table->table->updates, ps->pushed+1); while (1) { int msglen; struct stksess *ts; if (!eb || eb->key > ps->teaching_origin) { /* flag lesson stage1 complete */ ps->flags |= PEER_F_TEACH_STAGE2; ps->pushed = ps->teaching_origin; break; } ts = eb32_entry(eb, struct stksess, upd); msglen = peer_prepare_datamsg(ts, ps, trash.str, trash.size); if (msglen) { /* message to buffer */ repl = bi_putblk(si_ic(si), trash.str, msglen); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->lastpush = ps->pushed = ts->upd.key; } eb = eb32_next(eb); } } /* !TEACH_STAGE2 */ if (!(ps->flags & PEER_F_TEACH_FINISHED)) { /* process final lesson message */ repl = bi_putchr(si_ic(si), ((ps->table->flags & SHTABLE_RESYNC_STATEMASK) == SHTABLE_RESYNC_FINISHED) ? 'F' : 'C'); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } /* flag finished message sent */ ps->flags |= PEER_F_TEACH_FINISHED; } /* !TEACH_FINISHED */ } /* TEACH_PROCESS */ if (!(ps->flags & PEER_F_LEARN_ASSIGN) && (int)(ps->pushed - ps->table->table->localupdate) < 0) { /* Push local updates, only if no learning in progress (to avoid ping-pong effects) */ struct eb32_node *eb; eb = eb32_lookup_ge(&ps->table->table->updates, ps->pushed+1); while (1) { int msglen; struct stksess *ts; /* push local updates */ if (!eb) { eb = eb32_first(&ps->table->table->updates); if (!eb || ((int)(eb->key - ps->pushed) <= 0)) { ps->pushed = ps->table->table->localupdate; break; } } if ((int)(eb->key - ps->table->table->localupdate) > 0) { ps->pushed = ps->table->table->localupdate; break; } ts = eb32_entry(eb, struct stksess, upd); msglen = peer_prepare_datamsg(ts, ps, trash.str, trash.size); if (msglen) { /* message to buffer */ repl = bi_putblk(si_ic(si), trash.str, msglen); if (repl <= 0) { /* no more write possible */ if (repl == -1) goto full; appctx->st0 = PEER_SESS_ST_END; goto switchstate; } ps->lastpush = ps->pushed = ts->upd.key; } eb = eb32_next(eb); } } /* ! LEARN_ASSIGN */ /* noting more to do */ goto out; } case PEER_SESS_ST_EXIT: repl = snprintf(trash.str, trash.size, "%d\n", appctx->st1); if (bi_putblk(si_ic(si), trash.str, repl) == -1) goto full; appctx->st0 = PEER_SESS_ST_END; /* fall through */ case PEER_SESS_ST_END: { si_shutw(si); si_shutr(si); si_ic(si)->flags |= CF_READ_NULL; goto out; } } } out: si_oc(si)->flags |= CF_READ_DONTWAIT; return; full: si_applet_cant_put(si); goto out; }
/* This function is used for inter-stream-interface calls. It is called by the * producer to inform the consumer side that it may be interested in checking * for data in the buffer. Note that it intentionally does not update timeouts, * so that we can still check them later at wake-up. */ static void stream_int_chk_snd_conn(struct stream_interface *si) { struct channel *oc = si_oc(si); struct connection *conn = __objt_conn(si->end); if (unlikely(si->state > SI_ST_EST || (oc->flags & CF_SHUTW))) return; if (unlikely(channel_is_empty(oc))) /* called with nothing to send ! */ return; if (!oc->pipe && /* spliced data wants to be forwarded ASAP */ !(si->flags & SI_FL_WAIT_DATA)) /* not waiting for data */ return; if (conn->flags & (CO_FL_DATA_WR_ENA|CO_FL_CURR_WR_ENA)) { /* already subscribed to write notifications, will be called * anyway, so let's avoid calling it especially if the reader * is not ready. */ return; } /* Before calling the data-level operations, we have to prepare * the polling flags to ensure we properly detect changes. */ conn_refresh_polling_flags(conn); __conn_data_want_send(conn); if (!(conn->flags & (CO_FL_HANDSHAKE|CO_FL_WAIT_L4_CONN|CO_FL_WAIT_L6_CONN))) { si_conn_send(conn); if (conn->flags & CO_FL_ERROR) { /* Write error on the file descriptor */ __conn_data_stop_both(conn); si->flags |= SI_FL_ERR; goto out_wakeup; } } /* OK, so now we know that some data might have been sent, and that we may * have to poll first. We have to do that too if the buffer is not empty. */ if (channel_is_empty(oc)) { /* the connection is established but we can't write. Either the * buffer is empty, or we just refrain from sending because the * ->o limit was reached. Maybe we just wrote the last * chunk and need to close. */ __conn_data_stop_send(conn); if (((oc->flags & (CF_SHUTW|CF_AUTO_CLOSE|CF_SHUTW_NOW)) == (CF_AUTO_CLOSE|CF_SHUTW_NOW)) && (si->state == SI_ST_EST)) { si_shutw(si); goto out_wakeup; } if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0) si->flags |= SI_FL_WAIT_DATA; oc->wex = TICK_ETERNITY; } else { /* Otherwise there are remaining data to be sent in the buffer, * which means we have to poll before doing so. */ __conn_data_want_send(conn); si->flags &= ~SI_FL_WAIT_DATA; if (!tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); } if (likely(oc->flags & CF_WRITE_ACTIVITY)) { struct channel *ic = si_ic(si); /* update timeout if we have written something */ if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL && !channel_is_empty(oc)) oc->wex = tick_add_ifset(now_ms, oc->wto); if (tick_isset(ic->rex) && !(si->flags & SI_FL_INDEP_STR)) { /* Note: to prevent the client from expiring read timeouts * during writes, we refresh it. We only do this if the * interface is not configured for "independent streams", * because for some applications it's better not to do this, * for instance when continuously exchanging small amounts * of data which can full the socket buffers long before a * write timeout is detected. */ ic->rex = tick_add_ifset(now_ms, ic->rto); } } /* in case of special condition (error, shutdown, end of write...), we * have to notify the task. */ if (likely((oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR|CF_SHUTW)) || ((oc->flags & CF_WAKE_WRITE) && ((channel_is_empty(oc) && !oc->to_forward) || si->state != SI_ST_EST)))) { out_wakeup: if (!(si->flags & SI_FL_DONT_WAKE)) task_wakeup(si_task(si), TASK_WOKEN_IO); } /* commit possible polling changes */ conn_cond_update_polling(conn); }
/* * This function performs a shutdown-write on a stream interface attached to * a connection in a connected or init state (it does nothing for other * states). It either shuts the write side or marks itself as closed. The * buffer flags are updated to reflect the new state. It does also close * everything if the SI was marked as being in error state. If there is a * data-layer shutdown, it is called. */ static void stream_int_shutw_conn(struct stream_interface *si) { struct connection *conn = __objt_conn(si->end); struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); oc->flags &= ~CF_SHUTW_NOW; if (oc->flags & CF_SHUTW) return; oc->flags |= CF_SHUTW; oc->wex = TICK_ETERNITY; si->flags &= ~SI_FL_WAIT_DATA; switch (si->state) { case SI_ST_EST: /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. * However, if SI_FL_NOLINGER is explicitly set, we know there is * no risk so we close both sides immediately. */ if (si->flags & SI_FL_ERR) { /* quick close, the socket is alredy shut anyway */ } else if (si->flags & SI_FL_NOLINGER) { /* unclean data-layer shutdown */ conn_data_shutw_hard(conn); } else { /* clean data-layer shutdown */ conn_data_shutw(conn); /* If the stream interface is configured to disable half-open * connections, we'll skip the shutdown(), but only if the * read size is already closed. Otherwise we can't support * closed write with pending read (eg: abortonclose while * waiting for the server). */ if (!(si->flags & SI_FL_NOHALF) || !(ic->flags & (CF_SHUTR|CF_DONT_READ))) { /* We shutdown transport layer */ conn_sock_shutw(conn); if (!(ic->flags & (CF_SHUTR|CF_DONT_READ))) { /* OK just a shutw, but we want the caller * to disable polling on this FD if exists. */ conn_cond_update_polling(conn); return; } } } /* fall through */ case SI_ST_CON: /* we may have to close a pending connection, and mark the * response buffer as shutr */ conn_full_close(conn); /* fall through */ case SI_ST_CER: case SI_ST_QUE: case SI_ST_TAR: si->state = SI_ST_DIS; /* fall through */ default: si->flags &= ~(SI_FL_WAIT_ROOM | SI_FL_NOLINGER); ic->flags &= ~CF_SHUTR_NOW; ic->flags |= CF_SHUTR; ic->rex = TICK_ETERNITY; si->exp = TICK_ETERNITY; } }
/* Callback to be used by connection I/O handlers upon completion. It differs from * the update function in that it is designed to be called by lower layers after I/O * events have been completed. It will also try to wake the associated task up if * an important event requires special handling. It relies on the connection handler * to commit any polling updates. The function always returns 0. */ static int si_conn_wake_cb(struct connection *conn) { struct stream_interface *si = conn->owner; struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n", __FUNCTION__, si, si->state, ic->flags, oc->flags); if (conn->flags & CO_FL_ERROR) si->flags |= SI_FL_ERR; /* check for recent connection establishment */ if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED)))) { si->exp = TICK_ETERNITY; oc->flags |= CF_WRITE_NULL; } /* process consumer side */ if (channel_is_empty(oc)) { if (((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW) && (si->state == SI_ST_EST)) stream_int_shutw_conn(si); __conn_data_stop_send(conn); oc->wex = TICK_ETERNITY; } if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0 && channel_may_recv(oc)) si->flags |= SI_FL_WAIT_DATA; if (oc->flags & CF_WRITE_ACTIVITY) { /* update timeouts if we have written something */ if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL && !channel_is_empty(oc)) if (tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); if (!(si->flags & SI_FL_INDEP_STR)) if (tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); if (likely((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL|CF_DONT_READ)) == CF_WRITE_PARTIAL && channel_may_recv(oc) && (si_opposite(si)->flags & SI_FL_WAIT_ROOM))) si_chk_rcv(si_opposite(si)); } /* process producer side. * We might have some data the consumer is waiting for. * We can do fast-forwarding, but we avoid doing this for partial * buffers, because it is very likely that it will be done again * immediately afterwards once the following data is parsed (eg: * HTTP chunking). */ if (((ic->flags & CF_READ_PARTIAL) && !channel_is_empty(ic)) && (ic->pipe /* always try to send spliced data */ || (si_ib(si)->i == 0 && (si_opposite(si)->flags & SI_FL_WAIT_DATA)))) { int last_len = ic->pipe ? ic->pipe->data : 0; si_chk_snd(si_opposite(si)); /* check if the consumer has freed some space either in the * buffer or in the pipe. */ if (channel_may_recv(ic) && (!last_len || !ic->pipe || ic->pipe->data < last_len)) si->flags &= ~SI_FL_WAIT_ROOM; } if (si->flags & SI_FL_WAIT_ROOM) { __conn_data_stop_recv(conn); ic->rex = TICK_ETERNITY; } else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL|CF_DONT_READ)) == CF_READ_PARTIAL && channel_may_recv(ic)) { /* we must re-enable reading if si_chk_snd() has freed some space */ __conn_data_want_recv(conn); if (!(ic->flags & CF_READ_NOEXP) && tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); } /* wake the task up only when needed */ if (/* changes on the production side */ (ic->flags & (CF_READ_NULL|CF_READ_ERROR)) || si->state != SI_ST_EST || (si->flags & SI_FL_ERR) || ((ic->flags & CF_READ_PARTIAL) && (!ic->to_forward || si_opposite(si)->state != SI_ST_EST)) || /* changes on the consumption side */ (oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR)) || ((oc->flags & CF_WRITE_ACTIVITY) && ((oc->flags & CF_SHUTW) || ((oc->flags & CF_WAKE_WRITE) && (si_opposite(si)->state != SI_ST_EST || (channel_is_empty(oc) && !oc->to_forward)))))) { task_wakeup(si_task(si), TASK_WOKEN_IO); } if (ic->flags & CF_READ_ACTIVITY) ic->flags &= ~CF_READ_DONTWAIT; session_release_buffers(si_sess(si)); return 0; }
/* * This is the callback which is called by the connection layer to receive data * into the buffer from the connection. It iterates over the transport layer's * rcv_buf function. */ static void si_conn_recv_cb(struct connection *conn) { struct stream_interface *si = conn->owner; struct channel *ic = si_ic(si); int ret, max, cur_read; int read_poll = MAX_READ_POLL_LOOPS; /* stop immediately on errors. Note that we DON'T want to stop on * POLL_ERR, as the poller might report a write error while there * are still data available in the recv buffer. This typically * happens when we send too large a request to a backend server * which rejects it before reading it all. */ if (conn->flags & CO_FL_ERROR) return; /* stop here if we reached the end of data */ if (conn_data_read0_pending(conn)) goto out_shutdown_r; /* maybe we were called immediately after an asynchronous shutr */ if (ic->flags & CF_SHUTR) return; cur_read = 0; if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !ic->buf->o && global.tune.idle_timer && (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) { /* The buffer was empty and nothing was transferred for more * than one second. This was caused by a pause and not by * congestion. Reset any streaming mode to reduce latency. */ ic->xfer_small = 0; ic->xfer_large = 0; ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); } /* First, let's see if we may splice data across the channel without * using a buffer. */ if (conn->xprt->rcv_pipe && (ic->pipe || ic->to_forward >= MIN_SPLICE_FORWARD) && ic->flags & CF_KERN_SPLICING) { if (buffer_not_empty(ic->buf)) { /* We're embarrassed, there are already data pending in * the buffer and we don't want to have them at two * locations at a time. Let's indicate we need some * place and ask the consumer to hurry. */ goto abort_splice; } if (unlikely(ic->pipe == NULL)) { if (pipes_used >= global.maxpipes || !(ic->pipe = get_pipe())) { ic->flags &= ~CF_KERN_SPLICING; goto abort_splice; } } ret = conn->xprt->rcv_pipe(conn, ic->pipe, ic->to_forward); if (ret < 0) { /* splice not supported on this end, let's disable it */ ic->flags &= ~CF_KERN_SPLICING; goto abort_splice; } if (ret > 0) { if (ic->to_forward != CHN_INFINITE_FORWARD) ic->to_forward -= ret; ic->total += ret; cur_read += ret; ic->flags |= CF_READ_PARTIAL; } if (conn_data_read0_pending(conn)) goto out_shutdown_r; if (conn->flags & CO_FL_ERROR) return; if (conn->flags & CO_FL_WAIT_ROOM) { /* the pipe is full or we have read enough data that it * could soon be full. Let's stop before needing to poll. */ si->flags |= SI_FL_WAIT_ROOM; __conn_data_stop_recv(conn); } /* splice not possible (anymore), let's go on on standard copy */ } abort_splice: if (ic->pipe && unlikely(!ic->pipe->data)) { put_pipe(ic->pipe); ic->pipe = NULL; } /* now we'll need a buffer */ if (!stream_alloc_recv_buffer(ic)) { si->flags |= SI_FL_WAIT_ROOM; goto end_recv; } /* Important note : if we're called with POLL_IN|POLL_HUP, it means the read polling * was enabled, which implies that the recv buffer was not full. So we have a guarantee * that if such an event is not handled above in splice, it will be handled here by * recv(). */ while (!(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH | CO_FL_WAIT_ROOM | CO_FL_HANDSHAKE))) { max = channel_recv_max(ic); if (!max) { si->flags |= SI_FL_WAIT_ROOM; break; } ret = conn->xprt->rcv_buf(conn, ic->buf, max); if (ret <= 0) break; cur_read += ret; /* if we're allowed to directly forward data, we must update ->o */ if (ic->to_forward && !(ic->flags & (CF_SHUTW|CF_SHUTW_NOW))) { unsigned long fwd = ret; if (ic->to_forward != CHN_INFINITE_FORWARD) { if (fwd > ic->to_forward) fwd = ic->to_forward; ic->to_forward -= fwd; } b_adv(ic->buf, fwd); } ic->flags |= CF_READ_PARTIAL; ic->total += ret; if (!channel_may_recv(ic)) { si->flags |= SI_FL_WAIT_ROOM; break; } if ((ic->flags & CF_READ_DONTWAIT) || --read_poll <= 0) { si->flags |= SI_FL_WAIT_ROOM; __conn_data_stop_recv(conn); break; } /* if too many bytes were missing from last read, it means that * it's pointless trying to read again because the system does * not have them in buffers. */ if (ret < max) { /* if a streamer has read few data, it may be because we * have exhausted system buffers. It's not worth trying * again. */ if (ic->flags & CF_STREAMER) break; /* if we read a large block smaller than what we requested, * it's almost certain we'll never get anything more. */ if (ret >= global.tune.recv_enough) break; } } /* while !flags */ if (cur_read) { if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && (cur_read <= ic->buf->size / 2)) { ic->xfer_large = 0; ic->xfer_small++; if (ic->xfer_small >= 3) { /* we have read less than half of the buffer in * one pass, and this happened at least 3 times. * This is definitely not a streamer. */ ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); } else if (ic->xfer_small >= 2) { /* if the buffer has been at least half full twice, * we receive faster than we send, so at least it * is not a "fast streamer". */ ic->flags &= ~CF_STREAMER_FAST; } } else if (!(ic->flags & CF_STREAMER_FAST) && (cur_read >= ic->buf->size - global.tune.maxrewrite)) { /* we read a full buffer at once */ ic->xfer_small = 0; ic->xfer_large++; if (ic->xfer_large >= 3) { /* we call this buffer a fast streamer if it manages * to be filled in one call 3 consecutive times. */ ic->flags |= (CF_STREAMER | CF_STREAMER_FAST); } } else { ic->xfer_small = 0; ic->xfer_large = 0; } ic->last_read = now_ms; } end_recv: if (conn->flags & CO_FL_ERROR) return; if (conn_data_read0_pending(conn)) /* connection closed */ goto out_shutdown_r; return; out_shutdown_r: /* we received a shutdown */ ic->flags |= CF_READ_NULL; if (ic->flags & CF_AUTO_CLOSE) channel_shutw_now(ic); stream_sock_read0(si); conn_data_read0(conn); return; }
/* This function is the equivalent to stream_int_update() except that it's * designed to be called from outside the stream handlers, typically the lower * layers (applets, connections) after I/O completion. After updating the stream * interface and timeouts, it will try to forward what can be forwarded, then to * wake the associated task up if an important event requires special handling. * It should not be called from within the stream itself, stream_int_update() * is designed for this. */ void stream_int_notify(struct stream_interface *si) { struct channel *ic = si_ic(si); struct channel *oc = si_oc(si); /* process consumer side */ if (channel_is_empty(oc)) { if (((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW) && (si->state == SI_ST_EST)) si_shutw(si); oc->wex = TICK_ETERNITY; } /* indicate that we may be waiting for data from the output channel */ if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0 && channel_may_recv(oc)) si->flags |= SI_FL_WAIT_DATA; /* update OC timeouts and wake the other side up if it's waiting for room */ if (oc->flags & CF_WRITE_ACTIVITY) { if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL && !channel_is_empty(oc)) if (tick_isset(oc->wex)) oc->wex = tick_add_ifset(now_ms, oc->wto); if (!(si->flags & SI_FL_INDEP_STR)) if (tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); if (likely((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL|CF_DONT_READ)) == CF_WRITE_PARTIAL && channel_may_recv(oc) && (si_opposite(si)->flags & SI_FL_WAIT_ROOM))) si_chk_rcv(si_opposite(si)); } /* Notify the other side when we've injected data into the IC that * needs to be forwarded. We can do fast-forwarding as soon as there * are output data, but we avoid doing this if some of the data are * not yet scheduled for being forwarded, because it is very likely * that it will be done again immediately afterwards once the following * data are parsed (eg: HTTP chunking). We only SI_FL_WAIT_ROOM once * we've emptied *some* of the output buffer, and not just when there * is available room, because applets are often forced to stop before * the buffer is full. We must not stop based on input data alone because * an HTTP parser might need more data to complete the parsing. */ if (!channel_is_empty(ic) && (si_opposite(si)->flags & SI_FL_WAIT_DATA) && (ic->buf->i == 0 || ic->pipe)) { int new_len, last_len; last_len = ic->buf->o; if (ic->pipe) last_len += ic->pipe->data; si_chk_snd(si_opposite(si)); new_len = ic->buf->o; if (ic->pipe) new_len += ic->pipe->data; /* check if the consumer has freed some space either in the * buffer or in the pipe. */ if (channel_may_recv(ic) && new_len < last_len) si->flags &= ~SI_FL_WAIT_ROOM; } if (si->flags & SI_FL_WAIT_ROOM) { ic->rex = TICK_ETERNITY; } else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL|CF_DONT_READ)) == CF_READ_PARTIAL && channel_may_recv(ic)) { /* we must re-enable reading if si_chk_snd() has freed some space */ if (!(ic->flags & CF_READ_NOEXP) && tick_isset(ic->rex)) ic->rex = tick_add_ifset(now_ms, ic->rto); } /* wake the task up only when needed */ if (/* changes on the production side */ (ic->flags & (CF_READ_NULL|CF_READ_ERROR)) || si->state != SI_ST_EST || (si->flags & SI_FL_ERR) || ((ic->flags & CF_READ_PARTIAL) && (!ic->to_forward || si_opposite(si)->state != SI_ST_EST)) || /* changes on the consumption side */ (oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR)) || ((oc->flags & CF_WRITE_ACTIVITY) && ((oc->flags & CF_SHUTW) || ((oc->flags & CF_WAKE_WRITE) && (si_opposite(si)->state != SI_ST_EST || (channel_is_empty(oc) && !oc->to_forward)))))) { task_wakeup(si_task(si), TASK_WOKEN_IO); } if (ic->flags & CF_READ_ACTIVITY) ic->flags &= ~CF_READ_DONTWAIT; stream_release_buffers(si_strm(si)); }