/* add a socket to be listened on */ static int engine_add(struct Socket* sock) { assert(0 != sock); assert(0 == sockList[s_fd(sock)]); /* bounds-check... */ if (s_fd(sock) >= FD_SETSIZE) { log_write(LS_SYSTEM, L_ERROR, 0, "Attempt to add socket %d (> %d) to event engine", s_fd(sock), FD_SETSIZE); return 0; } sockList[s_fd(sock)] = sock; /* add to list */ if (s_fd(sock) >= highest_fd) /* update highest_fd */ highest_fd = s_fd(sock); Debug((DEBUG_ENGINE, "select: Adding socket %d to engine [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); /* set the fd set bits */ set_or_clear(s_fd(sock), 0, state_to_events(s_state(sock), s_events(sock))); return 1; /* success */ }
static void engine_delete(struct Socket *sock) { assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Deleting socket %d [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); if (epoll_ctl(epoll_fd, EPOLL_CTL_DEL, s_fd(sock), NULL) < 0) log_write(LS_SOCKET, L_WARNING, 0, "Unable to delete epoll item for socket %d", s_fd(sock)); }
static void engine_set_state(struct Socket *sock, enum SocketState new_state) { struct epoll_event evt; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Changing state for socket %p to %s", sock, state_to_name(new_state))); set_events(sock, new_state, s_events(sock), &evt); if (epoll_ctl(epoll_fd, EPOLL_CTL_MOD, s_fd(sock), &evt) < 0) event_generate(ET_ERROR, sock, errno); }
/* socket switching to new state */ static void engine_state(struct Socket* sock, enum SocketState new_state) { assert(0 != sock); assert(sock == sockList[s_fd(sock)]); Debug((DEBUG_ENGINE, "select: Changing state for socket %p to %s", sock, state_to_name(new_state))); /* set the correct events */ set_or_clear(s_fd(sock), state_to_events(s_state(sock), s_events(sock)), /* old state */ state_to_events(new_state, s_events(sock))); /* new state */ }
static int engine_add(struct Socket *sock) { struct epoll_event evt; assert(0 != sock); Debug((DEBUG_ENGINE, "epoll: Adding socket %d [%p], state %s, to engine", s_fd(sock), sock, state_to_name(s_state(sock)))); set_events(sock, s_state(sock), s_events(sock), &evt); if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, s_fd(sock), &evt) < 0) { event_generate(ET_ERROR, sock, errno); return 0; } return 1; }
/** Remove a socket from the event engine. * @param[in] sock Socket being destroyed. */ static void engine_delete(struct Socket* sock) { assert(0 != sock); assert(sock == sockList[s_fd(sock)]); Debug((DEBUG_ENGINE, "kqueue: Deleting socket %d [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); /* No need to remove it from the kernel - the fds may be closed * already, and the kernel automatically removes fds from the kqueue * when they are closed. So we just remove it from sockList[]. */ sockList[s_fd(sock)] = 0; }
/* socket going away */ static void engine_delete(struct Socket* sock) { assert(0 != sock); assert(sock == sockList[s_fd(sock)]); Debug((DEBUG_ENGINE, "select: Deleting socket %d [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); FD_CLR(s_fd(sock), &global_read_set); /* clear event set bits */ FD_CLR(s_fd(sock), &global_write_set); sockList[s_fd(sock)] = 0; /* zero the socket list entry */ while (highest_fd > -1 && sockList[highest_fd] == 0) /* update highest_fd */ highest_fd--; }
/** Remove a socket from the event engine. * @param[in] sock Socket being destroyed. */ static void engine_delete(struct Socket* sock) { int ii; assert(0 != sock); assert(sock == sockList[s_fd(sock)]); Debug((DEBUG_ENGINE, "kqueue: Deleting socket %d [%p], state %s", s_fd(sock), sock, state_to_name(s_state(sock)))); sockList[s_fd(sock)] = 0; /* Drop any unprocessed events citing this socket. */ for (ii = 0; ii < events_used; ii++) { if (events[ii].ident == s_fd(sock)) { events[ii] = events[--events_used]; } } }
/** Add a socket to the event engine. * @param[in] sock Socket to add to engine. * @return Non-zero on success, or zero on error. */ static int engine_add(struct Socket* sock) { assert(0 != sock); assert(0 == sockList[s_fd(sock)]); /* bounds-check... */ if (sock->s_fd >= kqueue_max) { log_write(LS_SYSTEM, L_ERROR, 0, "Attempt to add socket %d (> %d) to event engine", s_fd(sock), kqueue_max); return 0; } sockList[s_fd(sock)] = sock; /* add to list */ Debug((DEBUG_ENGINE, "kqueue: Adding socket %d [%p], state %s, to engine", s_fd(sock), sock, state_to_name(s_state(sock)))); /* Add socket to queue */ set_or_clear(sock, 0, state_to_events(s_state(sock), s_events(sock))); return 1; /* success */ }
/** Run engine event loop. * @param[in] gen Lists of generators of various types. */ static void engine_loop(struct Generators* gen) { struct kevent *events; int events_count; struct Socket* sock; struct timespec wait; int nevs; int i; int errcode; size_t codesize; if ((events_count = feature_int(FEAT_POLLS_PER_LOOP)) < 20) events_count = 20; events = (struct kevent *)MyMalloc(sizeof(struct kevent) * events_count); while (running) { if ((i = feature_int(FEAT_POLLS_PER_LOOP)) >= 20 && i != events_count) { events = (struct kevent *)MyRealloc(events, sizeof(struct kevent) * i); events_count = i; } /* set up the sleep time */ wait.tv_sec = timer_next(gen) ? (timer_next(gen) - CurrentTime) : -1; wait.tv_nsec = 0; Debug((DEBUG_INFO, "kqueue: delay: %Tu (%Tu) %Tu", timer_next(gen), CurrentTime, wait.tv_sec)); /* check for active events */ nevs = kevent(kqueue_id, 0, 0, events, events_count, wait.tv_sec < 0 ? 0 : &wait); CurrentTime = time(0); /* set current time... */ if (nevs < 0) { if (errno != EINTR) { /* ignore kevent interrupts */ /* Log the kqueue error */ log_write(LS_SOCKET, L_ERROR, 0, "kevent() error: %m"); if (!errors++) timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC, ERROR_EXPIRE_TIME); else if (errors > KQUEUE_ERROR_THRESHOLD) /* too many errors... */ exit_schedule(1, 0, 0, "too many kevent errors"); } /* old code did a sleep(1) here; with usage these days, * that may be too expensive */ continue; } for (i = 0; i < nevs; i++) { if (events[i].filter == EVFILT_SIGNAL) { /* it's a signal; deal appropriately */ event_generate(ET_SIGNAL, events[i].udata, events[i].ident); continue; /* skip socket processing loop */ } assert(events[i].filter == EVFILT_READ || events[i].filter == EVFILT_WRITE); sock = sockList[events[i].ident]; if (!sock) /* slots may become empty while processing events */ continue; assert(s_fd(sock) == events[i].ident); gen_ref_inc(sock); /* can't have it going away on us */ Debug((DEBUG_ENGINE, "kqueue: Checking socket %p (fd %d) state %s, " "events %s", sock, s_fd(sock), state_to_name(s_state(sock)), sock_flags(s_events(sock)))); if (s_state(sock) != SS_NOTSOCK) { errcode = 0; /* check for errors on socket */ codesize = sizeof(errcode); if (getsockopt(s_fd(sock), SOL_SOCKET, SO_ERROR, &errcode, &codesize) < 0) errcode = errno; /* work around Solaris implementation */ if (errcode) { /* an error occurred; generate an event */ Debug((DEBUG_ENGINE, "kqueue: Error %d on fd %d, socket %p", errcode, s_fd(sock), sock)); event_generate(ET_ERROR, sock, errcode); gen_ref_dec(sock); /* careful not to leak reference counts */ continue; } } switch (s_state(sock)) { case SS_CONNECTING: if (events[i].filter == EVFILT_WRITE) { /* connection completed */ Debug((DEBUG_ENGINE, "kqueue: Connection completed")); event_generate(ET_CONNECT, sock, 0); } break; case SS_LISTENING: if (events[i].filter == EVFILT_READ) { /* connect. to be accept. */ Debug((DEBUG_ENGINE, "kqueue: Ready for accept")); event_generate(ET_ACCEPT, sock, 0); } break; case SS_NOTSOCK: /* doing nothing socket-specific */ case SS_CONNECTED: if (events[i].filter == EVFILT_READ) { /* data on socket */ Debug((DEBUG_ENGINE, "kqueue: EOF or data to be read")); event_generate(events[i].flags & EV_EOF ? ET_EOF : ET_READ, sock, 0); } if (events[i].filter == EVFILT_WRITE) { /* socket writable */ Debug((DEBUG_ENGINE, "kqueue: Data can be written")); event_generate(ET_WRITE, sock, 0); } break; case SS_DATAGRAM: case SS_CONNECTDG: if (events[i].filter == EVFILT_READ) { /* socket readable */ Debug((DEBUG_ENGINE, "kqueue: Datagram to be read")); event_generate(ET_READ, sock, 0); } if (events[i].filter == EVFILT_WRITE) { /* socket writable */ Debug((DEBUG_ENGINE, "kqueue: Datagram can be written")); event_generate(ET_WRITE, sock, 0); } break; } gen_ref_dec(sock); /* we're done with it */ } timer_run(); /* execute any pending timers */ } }
/* engine event loop */ static void engine_loop(struct Generators* gen) { struct timeval wait; fd_set read_set; fd_set write_set; int nfds; int i; int errcode; size_t codesize; struct Socket *sock; while (running) { read_set = global_read_set; /* all hail structure copy!! */ write_set = global_write_set; /* set up the sleep time */ wait.tv_sec = timer_next(gen) ? (timer_next(gen) - CurrentTime) : -1; wait.tv_usec = 0; Debug((DEBUG_INFO, "select: delay: %Tu (%Tu) %Tu", timer_next(gen), CurrentTime, wait.tv_sec)); /* check for active files */ nfds = select(highest_fd + 1, &read_set, &write_set, 0, wait.tv_sec < 0 ? 0 : &wait); CurrentTime = time(0); /* set current time... */ if (nfds < 0) { if (errno != EINTR) { /* ignore select interrupts */ /* Log the select error */ log_write(LS_SOCKET, L_ERROR, 0, "select() error: %m"); if (!errors++) timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC, ERROR_EXPIRE_TIME); else if (errors > SELECT_ERROR_THRESHOLD) /* too many errors... */ server_restart("too many select errors"); } /* old code did a sleep(1) here; with usage these days, * that may be too expensive */ continue; } for (i = 0; nfds && i <= highest_fd; i++) { if (!(sock = sockList[i])) /* skip empty socket elements */ continue; assert(s_fd(sock) == i); gen_ref_inc(sock); /* can't have it going away on us */ Debug((DEBUG_ENGINE, "select: Checking socket %p (fd %d) state %s, " "events %s", sock, i, state_to_name(s_state(sock)), sock_flags(s_events(sock)))); if (s_state(sock) != SS_NOTSOCK) { errcode = 0; /* check for errors on socket */ codesize = sizeof(errcode); if (getsockopt(i, SOL_SOCKET, SO_ERROR, &errcode, &codesize) < 0) errcode = errno; /* work around Solaris implementation */ if (errcode) { /* an error occurred; generate an event */ Debug((DEBUG_ENGINE, "select: Error %d on fd %d, socket %p", errcode, i, sock)); event_generate(ET_ERROR, sock, errcode); gen_ref_dec(sock); /* careful not to leak reference counts */ continue; } } switch (s_state(sock)) { case SS_CONNECTING: if (FD_ISSET(i, &write_set)) { /* connection completed */ Debug((DEBUG_ENGINE, "select: Connection completed")); event_generate(ET_CONNECT, sock, 0); nfds--; continue; } break; case SS_LISTENING: if (FD_ISSET(i, &read_set)) { /* connection to be accepted */ Debug((DEBUG_ENGINE, "select: Ready for accept")); event_generate(ET_ACCEPT, sock, 0); nfds--; } break; case SS_NOTSOCK: if (FD_ISSET(i, &read_set)) { /* data on socket */ /* can't peek; it's not a socket */ Debug((DEBUG_ENGINE, "select: non-socket readable")); event_generate(ET_READ, sock, 0); nfds--; } break; case SS_CONNECTED: if (FD_ISSET(i, &read_set)) { /* data to be read from socket */ char c; switch (recv(i, &c, 1, MSG_PEEK)) { /* check for EOF */ case -1: /* error occurred?!? */ if (errno == EAGAIN) { Debug((DEBUG_ENGINE, "select: Resource temporarily " "unavailable?")); continue; } Debug((DEBUG_ENGINE, "select: Uncaught error!")); event_generate(ET_ERROR, sock, errno); break; case 0: /* EOF from client */ Debug((DEBUG_ENGINE, "select: EOF from client")); event_generate(ET_EOF, sock, 0); break; default: /* some data can be read */ Debug((DEBUG_ENGINE, "select: Data to be read")); event_generate(ET_READ, sock, 0); break; } } if (FD_ISSET(i, &write_set)) { /* data can be written to socket */ Debug((DEBUG_ENGINE, "select: Data can be written")); event_generate(ET_WRITE, sock, 0); } if (FD_ISSET(i, &read_set) || FD_ISSET(i, &write_set)) nfds--; break; case SS_DATAGRAM: case SS_CONNECTDG: if (FD_ISSET(i, &read_set)) { /* data to be read from socket */ Debug((DEBUG_ENGINE, "select: Datagram to be read")); event_generate(ET_READ, sock, 0); } if (FD_ISSET(i, &write_set)) { /* data can be written to socket */ Debug((DEBUG_ENGINE, "select: Datagram can be written")); event_generate(ET_WRITE, sock, 0); } if (FD_ISSET(i, &read_set) || FD_ISSET(i, &write_set)) nfds--; break; } assert(s_fd(sock) == i); gen_ref_dec(sock); /* we're done with it */ } timer_run(); /* execute any pending timers */ } }
static void engine_loop(struct Generators *gen) { struct epoll_event *events; struct Socket *sock; size_t codesize; int events_count, i, wait, nevs, errcode; if ((events_count = feature_int(FEAT_POLLS_PER_LOOP)) < 20) events_count = 20; events = MyMalloc(sizeof(events[0]) * events_count); while (running) { if ((i = feature_int(FEAT_POLLS_PER_LOOP)) >= 20 && i != events_count) { events = MyRealloc(events, sizeof(events[0]) * i); events_count = i; } wait = timer_next(gen) ? (timer_next(gen) - CurrentTime) * 1000 : -1; Debug((DEBUG_INFO, "epoll: delay: %d (%d) %d", timer_next(gen), CurrentTime, wait)); nevs = epoll_wait(epoll_fd, events, events_count, wait); CurrentTime = time(0); if (nevs < 0) { if (errno != EINTR) { log_write(LS_SOCKET, L_ERROR, 0, "epoll() error: %m"); if (!errors++) timer_add(timer_init(&clear_error), error_clear, 0, TT_PERIODIC, ERROR_EXPIRE_TIME); else if (errors > EPOLL_ERROR_THRESHOLD) server_restart("too many epoll errors"); } continue; } for (i = 0; i < nevs; i++) { if (!(sock = events[i].data.ptr)) continue; gen_ref_inc(sock); Debug((DEBUG_ENGINE, "epoll: Checking socket %p (fd %d) state %s, events %s", sock, s_fd(sock), state_to_name(s_state(sock)), sock_flags(s_events(sock)))); if (events[i].events & EPOLLERR) { errcode = 0; codesize = sizeof(errcode); if (getsockopt(s_fd(sock), SOL_SOCKET, SO_ERROR, &errcode, &codesize) < 0) errcode = errno; if (errcode) { event_generate(ET_ERROR, sock, errcode); gen_ref_dec(sock); continue; } } switch (s_state(sock)) { case SS_CONNECTING: if (events[i].events & EPOLLOUT) /* connection completed */ event_generate(ET_CONNECT, sock, 0); break; case SS_LISTENING: if (events[i].events & EPOLLIN) /* incoming connection */ event_generate(ET_ACCEPT, sock, 0); break; case SS_NOTSOCK: case SS_CONNECTED: if (events[i].events & EPOLLIN) event_generate((events[i].events & EPOLLHUP) ? ET_EOF : ET_READ, sock, 0); if (events[i].events & EPOLLOUT) event_generate(ET_WRITE, sock, 0); break; case SS_DATAGRAM: case SS_CONNECTDG: if (events[i].events & EPOLLIN) event_generate(ET_READ, sock, 0); if (events[i].events & EPOLLOUT) event_generate(ET_WRITE, sock, 0); break; } gen_ref_dec(sock); } timer_run(); } }