ST_HIDDEN int _st_epoll_pollset_add(struct pollfd *pds, int npds) { struct epoll_event ev; int i, fd; int old_events, events, op; /* Do as many checks as possible up front */ for (i = 0; i < npds; i++) { fd = pds[i].fd; if (fd < 0 || !pds[i].events || (pds[i].events & ~(POLLIN | POLLOUT | POLLPRI))) { errno = EINVAL; return -1; } if (fd >= _st_epoll_data->fd_data_size && _st_epoll_fd_data_expand(fd) < 0) { return -1; } } for (i = 0; i < npds; i++) { fd = pds[i].fd; old_events = _ST_EPOLL_EVENTS(fd); if (pds[i].events & POLLIN) { _ST_EPOLL_READ_CNT(fd)++; } if (pds[i].events & POLLOUT) { _ST_EPOLL_WRITE_CNT(fd)++; } if (pds[i].events & POLLPRI) { _ST_EPOLL_EXCEP_CNT(fd)++; } events = _ST_EPOLL_EVENTS(fd); if (events != old_events) { op = old_events ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; ev.events = events; ev.data.fd = fd; if (epoll_ctl(_st_epoll_data->epfd, op, fd, &ev) < 0 && (op != EPOLL_CTL_ADD || errno != EEXIST)) { break; } if (op == EPOLL_CTL_ADD) { _st_epoll_data->evtlist_cnt++; if (_st_epoll_data->evtlist_cnt > _st_epoll_data->evtlist_size) { _st_epoll_evtlist_expand(); } } } } if (i < npds) { /* Error */ int err = errno; /* Unroll the state */ _st_epoll_pollset_del(pds, i + 1); errno = err; return -1; } return 0; }
ST_HIDDEN void _st_epoll_pollset_del(struct pollfd *pds, int npds) { struct epoll_event ev; struct pollfd *pd; struct pollfd *epd = pds + npds; int old_events, events, op; /* * It's more or less OK if deleting fails because a descriptor * will either be closed or deleted in dispatch function after * it fires. */ for (pd = pds; pd < epd; pd++) { old_events = _ST_EPOLL_EVENTS(pd->fd); if (pd->events & POLLIN) { _ST_EPOLL_READ_CNT(pd->fd)--; } if (pd->events & POLLOUT) { _ST_EPOLL_WRITE_CNT(pd->fd)--; } if (pd->events & POLLPRI) { _ST_EPOLL_EXCEP_CNT(pd->fd)--; } events = _ST_EPOLL_EVENTS(pd->fd); /* * The _ST_EPOLL_REVENTS check below is needed so we can use * this function inside dispatch(). Outside of dispatch() * _ST_EPOLL_REVENTS is always zero for all descriptors. */ if (events != old_events && _ST_EPOLL_REVENTS(pd->fd) == 0) { op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; ev.events = events; ev.data.fd = pd->fd; if (epoll_ctl(_st_epoll_data->epfd, op, pd->fd, &ev) == 0 && op == EPOLL_CTL_DEL) { _st_epoll_data->evtlist_cnt--; } } } }
ST_HIDDEN void _st_epoll_dispatch(void) { st_utime_t min_timeout; _st_clist_t *q; _st_pollq_t *pq; struct pollfd *pds, *epds; struct epoll_event ev; int timeout, nfd, i, osfd, notify; int events, op; short revents; if (_ST_SLEEPQ == NULL) { timeout = -1; } else { min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout = (int) (min_timeout / 1000); } if (_st_epoll_data->pid != getpid()) { /* We probably forked, reinitialize epoll set */ close(_st_epoll_data->epfd); _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); if (_st_epoll_data->epfd < 0) { /* There is nothing we can do here, will retry later */ return; } fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); _st_epoll_data->pid = getpid(); /* Put all descriptors on ioq into new epoll set */ memset(_st_epoll_data->fd_data, 0, _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); _st_epoll_data->evtlist_cnt = 0; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); _st_epoll_pollset_add(pq->pds, pq->npds); } } /* Check for I/O operations */ nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, _st_epoll_data->evtlist_size, timeout); if (nfd > 0) { for (i = 0; i < nfd; i++) { osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { /* Also set I/O bits on error */ _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); } } for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; for (pds = pq->pds; pds < epds; pds++) { if (_ST_EPOLL_REVENTS(pds->fd) == 0) { pds->revents = 0; continue; } osfd = pds->fd; events = pds->events; revents = 0; if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) revents |= POLLIN; if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) revents |= POLLOUT; if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) revents |= POLLPRI; if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) revents |= POLLERR; if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) revents |= POLLHUP; pds->revents = revents; if (revents) { notify = 1; } } if (notify) { ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Here we will only delete/modify descriptors that * didn't fire (see comments in _st_epoll_pollset_del()). */ _st_epoll_pollset_del(pq->pds, pq->npds); if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } } for (i = 0; i < nfd; i++) { /* Delete/modify descriptors that fired */ osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = 0; events = _ST_EPOLL_EVENTS(osfd); op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; ev.events = events; ev.data.fd = osfd; if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && op == EPOLL_CTL_DEL) { _st_epoll_data->evtlist_cnt--; } } } }
// epoll 事件分发 ST_HIDDEN void _st_epoll_dispatch(void) { st_utime_t min_timeout; _st_clist_t *q; _st_pollq_t *pq; struct pollfd *pds, *epds; struct epoll_event ev; int timeout, nfd, i, osfd, notify; int events, op; short revents; if (_ST_SLEEPQ == NULL) { timeout = -1; } else { // 获取最早睡眠的线程还要睡多久(用于epoll超时) min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout = (int) (min_timeout / 1000); } // fork 出的子进程,则重新获取 epoll fd if (_st_epoll_data->pid != getpid()) { /* We probably forked, reinitialize epoll set */ close(_st_epoll_data->epfd); _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); if (_st_epoll_data->epfd < 0) { /* There is nothing we can do here, will retry later */ return; } // exec 调用时关闭 fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); _st_epoll_data->pid = getpid(); /* Put all descriptors on ioq into new epoll set */ memset(_st_epoll_data->fd_data, 0, _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); _st_epoll_data->evtlist_cnt = 0; // 将 io 队列的 net fd 都加入事件系统 for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); _st_epoll_pollset_add(pq->pds, pq->npds); } } /* Check for I/O operations */ nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, _st_epoll_data->evtlist_size, timeout); if (nfd > 0) { for (i = 0; i < nfd; i++) { osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { /* Also set I/O bits on error */ _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); } } // #### // 依次从 io 队列取出每个线程等待 io 的 pollfd,pq 是一个线程中加入的所有文件描述符 for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; // 遍历每个线程 pollfd ,获取对应的事件,然后从 io 队列中移除;如果 pollfd 上 // 有事件发生,则将其从 io 队列移除,没有事件的继续在 io 队列等待 for (pds = pq->pds; pds < epds; pds++) { if (_ST_EPOLL_REVENTS(pds->fd) == 0) { pds->revents = 0; continue; } osfd = pds->fd; events = pds->events; revents = 0; if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) revents |= POLLIN; if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) revents |= POLLOUT; if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) revents |= POLLPRI; if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) revents |= POLLERR; if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) revents |= POLLHUP; pds->revents = revents; // 判断是不是有事件发生 if (revents) { notify = 1; } } if (notify) { // 将有时间发生的描述符从 io 队列移除 ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Here we will only delete/modify descriptors that * didn't fire (see comments in _st_epoll_pollset_del()). */ // 将当前线程已处理完事件的文件描述符从事件系统中移除 _st_epoll_pollset_del(pq->pds, pq->npds); // 如果当前线程处于睡眠则从睡眠队列移除,然后将当前线程加入运行队列 if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } } // 修改剩下还有等待事件的描述符 for (i = 0; i < nfd; i++) { /* Delete/modify descriptors that fired */ osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = 0; events = _ST_EPOLL_EVENTS(osfd); op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; ev.events = events; ev.data.fd = osfd; if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && op == EPOLL_CTL_DEL) { _st_epoll_data->evtlist_cnt--; } } } }