static void vca_eev(const struct epoll_event *ep) { struct sess *ss[NEEV], *sp; int i, j; AN(ep->data.ptr); if (ep->data.ptr == vca_pipes) { if (ep->events & EPOLLIN || ep->events & EPOLLPRI) { j = 0; i = read(vca_pipes[0], ss, sizeof ss); if (i == -1 && errno == EAGAIN) return; while (i >= sizeof ss[0]) { CHECK_OBJ_NOTNULL(ss[j], SESS_MAGIC); assert(ss[j]->fd >= 0); AZ(ss[j]->obj); VTAILQ_INSERT_TAIL(&sesshead, ss[j], list); vca_cond_modadd(ss[j]->fd, ss[j]); j++; i -= sizeof ss[0]; } assert(i == 0); } } else { CAST_OBJ_NOTNULL(sp, ep->data.ptr, SESS_MAGIC); if (ep->events & EPOLLIN || ep->events & EPOLLPRI) { i = HTC_Rx(sp->htc); if (i == 0) { vca_modadd(sp->fd, sp, EPOLL_CTL_MOD); return; /* more needed */ } VTAILQ_REMOVE(&sesshead, sp, list); vca_handover(sp, i); } else if (ep->events & EPOLLERR) { VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "ERR"); SES_Delete(sp); } else if (ep->events & EPOLLHUP) { VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "HUP"); SES_Delete(sp); } else if (ep->events & EPOLLRDHUP) { VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "RHUP"); SES_Delete(sp); } } }
static void * vca_kqueue_main(void *arg) { struct kevent ke[NKEV], *kp; int j, n, dotimer; double deadline; struct sess *sp; THR_SetName("cache-kqueue"); (void)arg; kq = kqueue(); assert(kq >= 0); j = 0; EV_SET(&ke[j], 0, EVFILT_TIMER, EV_ADD, 0, 100, NULL); j++; EV_SET(&ke[j], vca_pipes[0], EVFILT_READ, EV_ADD, 0, 0, vca_pipes); j++; AZ(kevent(kq, ke, j, NULL, 0, NULL)); nki = 0; while (1) { dotimer = 0; n = kevent(kq, ki, nki, ke, NKEV, NULL); assert(n >= 1 && n <= NKEV); nki = 0; for (kp = ke, j = 0; j < n; j++, kp++) { if (kp->filter == EVFILT_TIMER) { dotimer = 1; continue; } assert(kp->filter == EVFILT_READ); vca_kev(kp); } if (!dotimer) continue; /* * Make sure we have no pending changes for the fd's * we are about to close, in case the accept(2) in the * other thread creates new fd's betwen our close and * the kevent(2) at the top of this loop, the kernel * would not know we meant "the old fd of this number". */ vca_kq_flush(); deadline = TIM_real() - params->sess_timeout; for (;;) { sp = VTAILQ_FIRST(&sesshead); if (sp == NULL) break; if (sp->t_open > deadline) break; VTAILQ_REMOVE(&sesshead, sp, list); // XXX: not yet (void)TCP_linger(sp->fd, 0); vca_close_session(sp, "timeout"); SES_Delete(sp); } } }
static int cnt_wait(struct sess *sp) { int i; struct pollfd pfd[1]; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); AZ(sp->vcl); AZ(sp->obj); assert(sp->xid == 0); i = HTC_Complete(sp->htc); while (i == 0) { if (params->session_linger > 0) { pfd[0].fd = sp->fd; pfd[0].events = POLLIN; pfd[0].revents = 0; i = poll(pfd, 1, params->session_linger); if (i == 0) { WSL(sp->wrk, SLT_Debug, sp->fd, "herding"); VSL_stats->sess_herd++; sp->wrk = NULL; vca_return_session(sp); return (1); } } i = HTC_Rx(sp->htc); } if (i == 1) { sp->step = STP_START; } else { if (i == -2) vca_close_session(sp, "overflow"); else if (i == -1 && Tlen(sp->htc->rxbuf) == 0 && (errno == 0 || errno == ECONNRESET)) vca_close_session(sp, "EOF"); else vca_close_session(sp, "error"); sp->step = STP_DONE; } return (0); }
static void vca_kev(const struct kevent *kp) { int i, j; struct sess *sp; struct sess *ss[NKEV]; AN(kp->udata); if (kp->udata == vca_pipes) { j = 0; i = read(vca_pipes[0], ss, sizeof ss); if (i == -1 && errno == EAGAIN) return; while (i >= sizeof ss[0]) { CHECK_OBJ_NOTNULL(ss[j], SESS_MAGIC); assert(ss[j]->sp_fd >= 0); AZ(ss[j]->obj); VTAILQ_INSERT_TAIL(&sesshead, ss[j], list); vca_kq_sess(ss[j], EV_ADD | EV_ONESHOT); j++; i -= sizeof ss[0]; } assert(i == 0); return; } CAST_OBJ_NOTNULL(sp, kp->udata, SESS_MAGIC); DSL(0x04, SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s", sp, (unsigned long)kp->data, kp->flags, (kp->flags & EV_EOF) ? " EOF" : ""); assert(sp->id == kp->ident); assert(sp->sp_fd == sp->id); if (kp->data > 0) { i = HTC_Rx(sp->htc); if (i == 0) { vca_kq_sess(sp, EV_ADD | EV_ONESHOT); return; /* more needed */ } VTAILQ_REMOVE(&sesshead, sp, list); vca_handover(sp, i); return; } else if (kp->flags & EV_EOF) { VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "EOF"); SES_Delete(sp); return; } else { VSL(SLT_Debug, sp->id, "KQ: sp %p kev data %lu flags 0x%x%s", sp, (unsigned long)kp->data, kp->flags, (kp->flags & EV_EOF) ? " EOF" : ""); } }
static void vca_ports_pass(struct sess *sp) { int r; r = port_send(solaris_dport, 0, TRUST_ME(sp)); if (r == -1 && errno == EAGAIN) { VSC_C_main->sess_pipe_overflow++; vca_close_session(sp, "session pipe overflow"); SES_Delete(sp); return; } AZ(r); }
static inline void vca_port_ev(port_event_t *ev) { struct sess *sp; if(ev->portev_source == PORT_SOURCE_USER) { CAST_OBJ_NOTNULL(sp, ev->portev_user, SESS_MAGIC); assert(sp->fd >= 0); AZ(sp->obj); VTAILQ_INSERT_TAIL(&sesshead, sp, list); vca_add(sp->fd, sp); } else { int i; assert(ev->portev_source == PORT_SOURCE_FD); CAST_OBJ_NOTNULL(sp, ev->portev_user, SESS_MAGIC); assert(sp->fd >= 0); if(ev->portev_events & POLLERR) { vca_del(sp->fd); VTAILQ_REMOVE(&sesshead, sp, list); vca_close_session(sp, "EOF"); SES_Delete(sp); return; } i = HTC_Rx(sp->htc); if (i == 0) { /* incomplete header, wait for more data */ vca_add(sp->fd, sp); return; } /* * note: the original man page for port_associate(3C) states: * * When an event for a PORT_SOURCE_FD object is retrieved, * the object no longer has an association with the port. * * This can be read along the lines of sparing the * port_dissociate after port_getn(), but in fact, * port_dissociate should be used * * Ref: http://opensolaris.org/jive/thread.jspa?threadID=129476&tstart=0 */ vca_del(sp->fd); VTAILQ_REMOVE(&sesshead, sp, list); /* vca_handover will also handle errors */ vca_handover(sp, i); } return; }
static void * vca_main(void *arg) { struct epoll_event ev[NEEV], *ep; struct sess *sp; char junk; double deadline; int dotimer, i, n; THR_SetName("cache-epoll"); (void)arg; epfd = epoll_create(1); assert(epfd >= 0); vca_modadd(vca_pipes[0], vca_pipes, EPOLL_CTL_ADD); vca_modadd(dotimer_pipe[0], dotimer_pipe, EPOLL_CTL_ADD); while (1) { dotimer = 0; n = epoll_wait(epfd, ev, NEEV, -1); for (ep = ev, i = 0; i < n; i++, ep++) { if (ep->data.ptr == dotimer_pipe && (ep->events == EPOLLIN || ep->events == EPOLLPRI)) { assert(read(dotimer_pipe[0], &junk, 1)); dotimer = 1; } else vca_eev(ep); } if (!dotimer) continue; /* check for timeouts */ deadline = TIM_real() - params->sess_timeout; for (;;) { sp = VTAILQ_FIRST(&sesshead); if (sp == NULL) break; if (sp->t_open > deadline) break; VTAILQ_REMOVE(&sesshead, sp, list); // XXX: not yet VTCP_linger(sp->fd, 0); vca_close_session(sp, "timeout"); SES_Delete(sp); } } return NULL; }
static void * vca_main(void *arg) { struct sess *sp; /* * timeouts: * * min_ts : Minimum timeout for port_getn * min_t : ^ equivalent in floating point representation * * max_ts : Maximum timeout for port_getn * max_t : ^ equivalent in floating point representation * * with (nevents == 1), we should always choose the correct port_getn * timeout to check session timeouts, so max is just a safety measure * (if this implementation is correct, it could be set to an "infinte" * value) * * with (nevents > 1), min and max define the acceptable range for * - additional latency of keep-alive connections and * - additional tolerance for handling session timeouts * */ static struct timespec min_ts = {0L, 100L /*ms*/ * 1000L /*us*/ * 1000L /*ns*/}; static double min_t = 0.1; /* 100 ms*/ static struct timespec max_ts = {1L, 0L}; /* 1 second */ static double max_t = 1.0; /* 1 second */ struct timespec ts; struct timespec *timeout; (void)arg; solaris_dport = port_create(); assert(solaris_dport >= 0); timeout = &max_ts; while (1) { port_event_t ev[MAX_EVENTS]; int nevents, ei, ret; double now, deadline; /* * XXX Do we want to scale this up dynamically to increase * efficiency in high throughput situations? - would need to * start with one to keep latency low at any rate * * Note: when increasing nevents, we must lower min_ts * and max_ts */ nevents = 1; /* * see disucssion in * - https://issues.apache.org/bugzilla/show_bug.cgi?id=47645 * - http://mail.opensolaris.org/pipermail/networking-discuss/2009-August/011979.html * * comment from apr/poll/unix/port.c : * * This confusing API can return an event at the same time * that it reports EINTR or ETIME. * */ ret = port_getn(solaris_dport, ev, MAX_EVENTS, &nevents, timeout); if (ret < 0) assert((errno == EINTR) || (errno == ETIME)); for (ei=0; ei<nevents; ei++) { vca_port_ev(ev + ei); } /* check for timeouts */ now = TIM_real(); deadline = now - params->sess_timeout; /* * This loop assumes that the oldest sessions are always at the * beginning of the list (which is the case if we guarantee to * enqueue at the tail only * */ for (;;) { sp = VTAILQ_FIRST(&sesshead); if (sp == NULL) break; if (sp->t_open > deadline) { break; } VTAILQ_REMOVE(&sesshead, sp, list); if(sp->fd != -1) { vca_del(sp->fd); } vca_close_session(sp, "timeout"); SES_Delete(sp); } /* * Calculate the timeout for the next get_portn */ if (sp) { double tmo = (sp->t_open + params->sess_timeout) - now; /* we should have removed all sps whose timeout has passed */ assert(tmo > 0.0); if (tmo < min_t) { timeout = &min_ts; } else if (tmo > max_t) { timeout = &max_ts; } else { /* TIM_t2ts() ? see #630 */ ts.tv_sec = (int)floor(tmo); ts.tv_nsec = 1e9 * (tmo - ts.tv_sec); timeout = &ts; } } else { timeout = &max_ts; } } }
static int cnt_done(struct sess *sp) { double dh, dp, da; int i; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); CHECK_OBJ_ORNULL(sp->vcl, VCL_CONF_MAGIC); AZ(sp->obj); AZ(sp->vbe); sp->director = NULL; sp->restarts = 0; if (sp->vcl != NULL && sp->esis == 0) { if (sp->wrk->vcl != NULL) VCL_Rel(&sp->wrk->vcl); sp->wrk->vcl = sp->vcl; sp->vcl = NULL; } sp->t_end = TIM_real(); sp->wrk->lastused = sp->t_end; if (sp->xid == 0) { sp->t_req = sp->t_end; sp->t_resp = sp->t_end; } dp = sp->t_resp - sp->t_req; da = sp->t_end - sp->t_resp; dh = sp->t_req - sp->t_open; WSL(sp->wrk, SLT_ReqEnd, sp->id, "%u %.9f %.9f %.9f %.9f %.9f", sp->xid, sp->t_req, sp->t_end, dh, dp, da); sp->xid = 0; sp->t_open = sp->t_end; sp->t_resp = NAN; WSL_Flush(sp->wrk, 0); /* If we did an ESI include, don't mess up our state */ if (sp->esis > 0) return (1); sp->t_req = NAN; if (sp->fd >= 0 && sp->doclose != NULL) { /* * This is an orderly close of the connection; ditch nolinger * before we close, to get queued data transmitted. */ TCP_linger(sp->fd, 0); vca_close_session(sp, sp->doclose); } if (sp->fd < 0) { SES_Charge(sp); VSL_stats->sess_closed++; sp->wrk = NULL; SES_Delete(sp); return (1); } /* Reset the workspace to the session-watermark */ WS_Reset(sp->ws, sp->ws_ses); i = HTC_Reinit(sp->htc); if (i == 1) { VSL_stats->sess_pipeline++; sp->step = STP_START; return (0); } if (Tlen(sp->htc->rxbuf)) { VSL_stats->sess_readahead++; sp->step = STP_WAIT; return (0); } if (params->session_linger > 0) { VSL_stats->sess_linger++; sp->step = STP_WAIT; return (0); } VSL_stats->sess_herd++; SES_Charge(sp); sp->wrk = NULL; vca_return_session(sp); return (1); }
void PipeSession(struct sess *sp) { struct vbe_conn *vc; struct worker *w; struct pollfd fds[2]; int i; CHECK_OBJ_NOTNULL(sp, SESS_MAGIC); CHECK_OBJ_NOTNULL(sp->wrk, WORKER_MAGIC); w = sp->wrk; sp->vbe = VBE_GetFd(NULL, sp); if (sp->vbe == NULL) return; vc = sp->vbe; (void)TCP_blocking(vc->fd); WRW_Reserve(w, &vc->fd); sp->acct_req.hdrbytes += http_Write(w, sp->wrk->bereq, 0); if (sp->htc->pipeline.b != NULL) sp->acct_req.bodybytes += WRW_Write(w, sp->htc->pipeline.b, Tlen(sp->htc->pipeline)); i = WRW_FlushRelease(w); if (i) { vca_close_session(sp, "pipe"); VBE_ClosedFd(sp); return; } sp->t_resp = TIM_real(); memset(fds, 0, sizeof fds); // XXX: not yet (void)TCP_linger(vc->fd, 0); fds[0].fd = vc->fd; fds[0].events = POLLIN | POLLERR; // XXX: not yet (void)TCP_linger(sp->fd, 0); fds[1].fd = sp->fd; fds[1].events = POLLIN | POLLERR; while (fds[0].fd > -1 || fds[1].fd > -1) { fds[0].revents = 0; fds[1].revents = 0; i = poll(fds, 2, params->pipe_timeout * 1000); if (i < 1) break; if (fds[0].revents && rdf(vc->fd, sp->fd)) { (void)shutdown(vc->fd, SHUT_RD); (void)shutdown(sp->fd, SHUT_WR); fds[0].events = 0; fds[0].fd = -1; } if (fds[1].revents && rdf(sp->fd, vc->fd)) { (void)shutdown(sp->fd, SHUT_RD); (void)shutdown(vc->fd, SHUT_WR); fds[1].events = 0; fds[1].fd = -1; } } if (fds[0].fd >= 0) { (void)shutdown(vc->fd, SHUT_RD); (void)shutdown(sp->fd, SHUT_WR); } if (fds[1].fd >= 0) { (void)shutdown(sp->fd, SHUT_RD); (void)shutdown(vc->fd, SHUT_WR); } vca_close_session(sp, "pipe"); VBE_ClosedFd(sp); }