int st_mutex_unlock(_st_mutex_t *lock) { _st_thread_t *thread; _st_clist_t *q; if (lock->owner != _ST_CURRENT_THREAD()) { errno = EPERM; return -1; } for (q = lock->wait_q.next; q != &lock->wait_q; q = q->next) { thread = _ST_THREAD_WAITQ_PTR(q); if (thread->state == _ST_ST_LOCK_WAIT) { lock->owner = thread; /* Make thread runnable */ thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); return 0; } } /* No threads waiting on this mutex */ lock->owner = NULL; return 0; }
void _st_vp_check_clock(void) { _st_thread_t *thread; st_utime_t elapsed, now; now = st_utime(); elapsed = now - _ST_LAST_CLOCK; _ST_LAST_CLOCK = now; if (_st_curr_time && now - _st_last_tset > 999000) { _st_curr_time = time(NULL); _st_last_tset = now; } while (_ST_SLEEPQ != NULL) { thread = _ST_SLEEPQ; ST_ASSERT(thread->flags & _ST_FL_ON_SLEEPQ); if (thread->due > now) break; _ST_DEL_SLEEPQ(thread); /* If thread is waiting on condition variable, set the time out flag */ if (thread->state == _ST_ST_COND_WAIT) thread->flags |= _ST_FL_TIMEDOUT; /* Make thread runnable */ ST_ASSERT(!(thread->flags & _ST_FL_IDLE_THREAD)); thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); } }
void st_thread_interrupt(_st_thread_t *thread) { /* If thread is already dead */ if (thread->state == _ST_ST_ZOMBIE) return; thread->flags |= _ST_FL_INTERRUPT; if (thread->state == _ST_ST_RUNNING || thread->state == _ST_ST_RUNNABLE) return; if (thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(thread); /* Make thread runnable */ thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); }
static int _st_cond_signal(_st_cond_t *cvar, int broadcast) { _st_thread_t *thread; _st_clist_t *q; for (q = cvar->wait_q.next; q != &cvar->wait_q; q = q->next) { thread = _ST_THREAD_WAITQ_PTR(q); if (thread->state == _ST_ST_COND_WAIT) { if (thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(thread); /* Make thread runnable */ thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); if (!broadcast) break; } } return 0; }
int st_thread_join(_st_thread_t *thread, void **retvalp) { _st_cond_t *term = thread->term; /* Can't join a non-joinable thread */ if (term == NULL) { errno = EINVAL; return -1; } if (_ST_CURRENT_THREAD() == thread) { errno = EDEADLK; return -1; } /* Multiple threads can't wait on the same joinable thread */ if (term->wait_q.next != &term->wait_q) { errno = EINVAL; return -1; } while (thread->state != _ST_ST_ZOMBIE) { if (st_cond_timedwait(term, ST_UTIME_NO_TIMEOUT) != 0) return -1; } if (retvalp) *retvalp = thread->retval; /* * Remove target thread from the zombie queue and make it runnable. * When it gets scheduled later, it will do the clean up. */ thread->state = _ST_ST_RUNNABLE; _ST_DEL_ZOMBIEQ(thread); _ST_ADD_RUNQ(thread); return 0; }
void _st_vp_check_clock(void) { st_thread_t *thread; st_utime_t elapsed, now; now = st_utime(); elapsed = now - _st_this_vp.last_clock; _st_this_vp.last_clock = now; if (_st_curr_time && now - _st_last_tset > 999000) { _st_curr_time = time(NULL); _st_last_tset = now; } while (_ST_SLEEPQ.next != &_ST_SLEEPQ) { thread = _ST_THREAD_PTR(_ST_SLEEPQ.next); ST_ASSERT(thread->flags & _ST_FL_ON_SLEEPQ); if (elapsed < thread->sleep) { thread->sleep -= elapsed; _ST_SLEEPQMAX -= elapsed; break; } _ST_DEL_SLEEPQ(thread, 1); elapsed -= thread->sleep; /* If thread is waiting on condition variable, set the time out flag */ if (thread->state == _ST_ST_COND_WAIT) thread->flags |= _ST_FL_TIMEDOUT; /* Make thread runnable */ ST_ASSERT(!(thread->flags & _ST_FL_IDLE_THREAD)); thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); } }
_st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size) { _st_thread_t *thread; _st_stack_t *stack; void **ptds; char *sp; #ifdef __ia64__ char *bsp; #endif /* Adjust stack size */ if (stk_size == 0) stk_size = ST_DEFAULT_STACK_SIZE; stk_size = ((stk_size + _ST_PAGE_SIZE - 1) / _ST_PAGE_SIZE) * _ST_PAGE_SIZE; stack = _st_stack_new(stk_size); if (!stack) return NULL; /* Allocate thread object and per-thread data off the stack */ #if defined (MD_STACK_GROWS_DOWN) sp = stack->stk_top; #ifdef __ia64__ /* * The stack segment is split in the middle. The upper half is used * as backing store for the register stack which grows upward. * The lower half is used for the traditional memory stack which * grows downward. Both stacks start in the middle and grow outward * from each other. */ sp -= (stk_size >> 1); bsp = sp; /* Make register stack 64-byte aligned */ if ((unsigned long)bsp & 0x3f) bsp = bsp + (0x40 - ((unsigned long)bsp & 0x3f)); stack->bsp = bsp + _ST_STACK_PAD_SIZE; #endif sp = sp - (ST_KEYS_MAX * sizeof(void *)); ptds = (void **) sp; sp = sp - sizeof(_st_thread_t); thread = (_st_thread_t *) sp; /* Make stack 64-byte aligned */ if ((unsigned long)sp & 0x3f) sp = sp - ((unsigned long)sp & 0x3f); stack->sp = sp - _ST_STACK_PAD_SIZE; #elif defined (MD_STACK_GROWS_UP) sp = stack->stk_bottom; thread = (_st_thread_t *) sp; sp = sp + sizeof(_st_thread_t); ptds = (void **) sp; sp = sp + (ST_KEYS_MAX * sizeof(void *)); /* Make stack 64-byte aligned */ if ((unsigned long)sp & 0x3f) sp = sp + (0x40 - ((unsigned long)sp & 0x3f)); stack->sp = sp + _ST_STACK_PAD_SIZE; #else #error Unknown OS #endif memset(thread, 0, sizeof(_st_thread_t)); memset(ptds, 0, ST_KEYS_MAX * sizeof(void *)); /* Initialize thread */ thread->private_data = ptds; thread->stack = stack; thread->start = start; thread->arg = arg; #ifndef __ia64__ _ST_INIT_CONTEXT(thread, stack->sp, _st_thread_main); #else _ST_INIT_CONTEXT(thread, stack->sp, stack->bsp, _st_thread_main); #endif /* If thread is joinable, allocate a termination condition variable */ if (joinable) { thread->term = st_cond_new(); if (thread->term == NULL) { _st_stack_free(thread->stack); return NULL; } } /* Make thread runnable */ thread->state = _ST_ST_RUNNABLE; _st_active_count++; _ST_ADD_RUNQ(thread); #ifdef DEBUG _ST_ADD_THREADQ(thread); #endif return thread; }
extern "C" st_thread_t *st_thread_create(void *(*start)(void *arg), void *arg, int joinable, int stk_size) { st_thread_t *thread; st_stack_t *stack; void **ptds; char *sp; /* Adjust stack size */ if (stk_size == 0) stk_size = ST_DEFAULT_STACK_SIZE; stk_size = ((stk_size + _ST_PAGE_SIZE - 1) / _ST_PAGE_SIZE) * _ST_PAGE_SIZE; stack = _st_stack_new(stk_size); if (!stack) return NULL; /* Allocate thread object and per-thread data off the stack */ #if defined (MD_STACK_GROWS_DOWN) sp = stack->stk_top; sp = sp - (ST_KEYS_MAX * sizeof(void *)); ptds = (void **) sp; sp = sp - sizeof(st_thread_t); thread = (st_thread_t *) sp; /* Make stack 64-byte aligned */ if ((unsigned long)sp & 0x3f) sp = sp - ((unsigned long)sp & 0x3f); stack->sp = sp - _ST_STACK_PAD_SIZE; #elif defined (MD_STACK_GROWS_UP) sp = stack->stk_bottom; thread = (st_thread_t *)sp; sp = sp + sizeof(st_thread_t); ptds = (void **)sp; sp = sp + (ST_KEYS_MAX * sizeof(void *)); /* Make stack 64-byte aligned */ if ((unsigned long)sp & 0x3f) sp = sp + (0x40 - ((unsigned long)sp & 0x3f)); stack->sp = sp + _ST_STACK_PAD_SIZE; #else #error Unknown OS #endif memset(thread, 0, sizeof(st_thread_t)); memset(ptds, 0, ST_KEYS_MAX * sizeof(void *)); /* Initialize thread */ thread->private_data = ptds; thread->stack = stack; thread->start = start; thread->arg = arg; _ST_INIT_CONTEXT(thread, stack->sp, _st_thread_main); /* If thread is joinable, allocate a termination condition variable */ if (joinable) { thread->term = st_cond_new(); if (thread->term == NULL) { _st_stack_free(thread->stack); return NULL; } } if (!thread->context_switch) { thread->context_switch = (st_context_switch_t*)calloc(1, sizeof(st_context_switch_t)); thread->context_switch->thread = thread; } /* Make thread runnable */ thread->state = _ST_ST_RUNNABLE; _st_active_count++; _ST_ADD_RUNQ(thread); return thread; }
void _st_vp_idle(void) { struct timeval timeout, *tvp = 0; st_utime_t min_timeout; st_thread_t* thread=0; DWORD ret = 0; if (ST_CLIST_IS_EMPTY(&_ST_SLEEPQ)) { tvp = NULL; } else { min_timeout = (_ST_THREAD_PTR(_ST_SLEEPQ.next))->sleep; timeout.tv_sec = (int)(min_timeout / 1000000); timeout.tv_usec = (int)(min_timeout % 1000000); tvp = &timeout; } ret = WaitForSingleObject(_st_notify_event, 1000); if(ret == WAIT_OBJECT_0) { while(_st_lock_free_dequeue(_st_lock_free_queue, &thread)) { if (thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(thread, 0); thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(thread); } /*else { printf("fail....\n"); }*/ //while(thread = _st_lock_free_dequeue(_st_lock_free_queue)) //{ // if (thread) // { // if (thread->flags & _ST_FL_ON_SLEEPQ) // _ST_DEL_SLEEPQ(thread, 0); // thread->state = _ST_ST_RUNNABLE; // _ST_ADD_RUNQ(thread); // } //} /* st_thread_cell_t* cell = st_stack_pop(_st_fifo); { if (cell && cell->thread) { if (cell->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(thread, 0); cell->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(cell->thread); } }*/ // http://wenku.baidu.com/view/44ff811455270722192ef7fb.html } ResetEvent(_st_notify_event); }
ST_HIDDEN void _st_select_find_bad_fd(void) { _st_clist_t *q; _st_pollq_t *pq; int notify; struct pollfd *pds, *epds; int pq_max_osfd, osfd; short events; unsigned long noBlock = 0; _ST_SELECT_MAX_OSFD = -1; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; pq_max_osfd = -1; for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; pds->revents = 0; if (pds->events == 0) continue; if (ioctlsocket(fds[osfd], FIONBIO , &noBlock ) < 0){ pds->revents = POLLNVAL; notify = 1; } if (osfd > pq_max_osfd) { pq_max_osfd = osfd; } } if (notify) { ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Decrement the count of descriptors for each descriptor/event * because this I/O request is being removed from the ioq */ for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; events = pds->events; if (events & POLLIN) { if (--_ST_SELECT_READ_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_READ_SET); } } if (events & POLLOUT) { if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_WRITE_SET); } } if (events & POLLPRI) { if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_EXCEP_SET); } } } if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } else { if (_ST_SELECT_MAX_OSFD < pq_max_osfd) _ST_SELECT_MAX_OSFD = pq_max_osfd; } } }
ST_HIDDEN void _st_select_dispatch(void) { struct timeval timeout, *tvp; fd_set r, w, e; fd_set *rp, *wp, *ep; int nfd, pq_max_osfd, osfd; _st_clist_t *q; st_utime_t min_timeout; _st_pollq_t *pq; int notify; struct pollfd *pds, *epds; short events, revents; /* * Assignment of fd_sets */ r = _ST_SELECT_READ_SET; w = _ST_SELECT_WRITE_SET; e = _ST_SELECT_EXCEP_SET; rp = &r; wp = &w; ep = &e; if (_ST_SLEEPQ == NULL) { tvp = NULL; } else { min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout.tv_sec = (int) (min_timeout / 1000000); timeout.tv_usec = (int) (min_timeout % 1000000); tvp = &timeout; } /* Check for I/O operations */ nfd = select(_ST_SELECT_MAX_OSFD + 1, rp, wp, ep, tvp); /* Notify threads that are associated with the selected descriptors */ if (nfd > 0) { _ST_SELECT_MAX_OSFD = -1; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; pq_max_osfd = -1; for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; events = pds->events; revents = 0; if ((events & POLLIN) && FD_ISSET(fds[osfd], rp)) { revents |= POLLIN; } if ((events & POLLOUT) && FD_ISSET(fds[osfd], wp)) { revents |= POLLOUT; } if ((events & POLLPRI) && FD_ISSET(fds[osfd], ep)) { revents |= POLLPRI; } pds->revents = revents; if (revents) { notify = 1; } if (osfd > pq_max_osfd) { pq_max_osfd = osfd; } } if (notify) { ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Decrement the count of descriptors for each descriptor/event * because this I/O request is being removed from the ioq */ for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; events = pds->events; if (events & POLLIN) { if (--_ST_SELECT_READ_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_READ_SET); } } if (events & POLLOUT) { if (--_ST_SELECT_WRITE_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_WRITE_SET); } } if (events & POLLPRI) { if (--_ST_SELECT_EXCEP_CNT(osfd) == 0) { FD_CLR(fds[osfd], &_ST_SELECT_EXCEP_SET); } } } if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } else { if (_ST_SELECT_MAX_OSFD < pq_max_osfd) _ST_SELECT_MAX_OSFD = pq_max_osfd; } } } else if (nfd < 0) { /* * It can happen when a thread closes file descriptor * that is being used by some other thread -- BAD! */ if (errno == EBADF) _st_select_find_bad_fd(); } }
ST_HIDDEN void _st_kq_dispatch(void) { struct timespec timeout, *tsp; struct kevent kev; st_utime_t min_timeout; _st_clist_t *q; _st_pollq_t *pq; struct pollfd *pds, *epds; int nfd, i, osfd, notify, filter; short events, revents; if (_ST_SLEEPQ == NULL) { tsp = NULL; } else { min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout.tv_sec = (time_t) (min_timeout / 1000000); timeout.tv_nsec = (long) ((min_timeout % 1000000) * 1000); tsp = &timeout; } retry_kevent: /* Check for I/O operations */ nfd = kevent(_st_kq_data->kq, _st_kq_data->addlist, _st_kq_data->addlist_cnt, _st_kq_data->evtlist, _st_kq_data->evtlist_size, tsp); _st_kq_data->addlist_cnt = 0; if (nfd > 0) { for (i = 0; i < nfd; i++) { osfd = _st_kq_data->evtlist[i].ident; filter = _st_kq_data->evtlist[i].filter; if (filter == EVFILT_READ) { _ST_KQ_REVENTS(osfd) |= POLLIN; } else if (filter == EVFILT_WRITE) { _ST_KQ_REVENTS(osfd) |= POLLOUT; } if (_st_kq_data->evtlist[i].flags & EV_ERROR) { if (_st_kq_data->evtlist[i].data == EBADF) { _ST_KQ_REVENTS(osfd) |= POLLNVAL; } else { _ST_KQ_REVENTS(osfd) |= POLLERR; } } } _st_kq_data->dellist_cnt = 0; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; events = pds->events; revents = (short)(_ST_KQ_REVENTS(osfd) & ~(POLLIN | POLLOUT)); if ((events & POLLIN) && (_ST_KQ_REVENTS(osfd) & POLLIN)) { revents |= POLLIN; } if ((events & POLLOUT) && (_ST_KQ_REVENTS(osfd) & POLLOUT)) { revents |= POLLOUT; } pds->revents = revents; if (revents) { notify = 1; } } if (notify) { ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; for (pds = pq->pds; pds < epds; pds++) { osfd = pds->fd; events = pds->events; /* * We set EV_ONESHOT flag so we only need to delete * descriptor if it didn't fire. */ if ((events & POLLIN) && (--_ST_KQ_READ_CNT(osfd) == 0) && ((_ST_KQ_REVENTS(osfd) & POLLIN) == 0)) { memset(&kev, 0, sizeof(kev)); kev.ident = osfd; kev.filter = EVFILT_READ; kev.flags = EV_DELETE; _st_kq_dellist_add(&kev); } if ((events & POLLOUT) && (--_ST_KQ_WRITE_CNT(osfd) == 0) && ((_ST_KQ_REVENTS(osfd) & POLLOUT) == 0)) { memset(&kev, 0, sizeof(kev)); kev.ident = osfd; kev.filter = EVFILT_WRITE; kev.flags = EV_DELETE; _st_kq_dellist_add(&kev); } } if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } } if (_st_kq_data->dellist_cnt > 0) { int rv; do { /* This kevent() won't block since result list size is 0 */ rv = kevent(_st_kq_data->kq, _st_kq_data->dellist, _st_kq_data->dellist_cnt, NULL, 0, NULL); } while (rv < 0 && errno == EINTR); } for (i = 0; i < nfd; i++) { osfd = _st_kq_data->evtlist[i].ident; _ST_KQ_REVENTS(osfd) = 0; } } else if (nfd < 0) { if (errno == EBADF && _st_kq_data->pid != getpid()) { /* We probably forked, reinitialize kqueue */ if ((_st_kq_data->kq = kqueue()) < 0) { /* There is nothing we can do here, will retry later */ return; } fcntl(_st_kq_data->kq, F_SETFD, FD_CLOEXEC); _st_kq_data->pid = getpid(); /* Re-register all descriptors on ioq with new kqueue */ memset(_st_kq_data->fd_data, 0, _st_kq_data->fd_data_size * sizeof(_kq_fd_data_t)); for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); _st_kq_pollset_add(pq->pds, pq->npds); } goto retry_kevent; } } }
ST_HIDDEN void _st_poll_dispatch(void) { int timeout, nfd; _st_clist_t *q; st_utime_t min_timeout; _st_pollq_t *pq; struct pollfd *pds, *epds, *pollfds; /* * Build up the array of struct pollfd to wait on. * If existing array is not big enough, release it and allocate a new one. */ ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); if (_ST_POLL_OSFD_CNT > _ST_POLLFDS_SIZE) { free(_ST_POLLFDS); _ST_POLLFDS = (struct pollfd *) malloc((_ST_POLL_OSFD_CNT + 10) * sizeof(struct pollfd)); ST_ASSERT(_ST_POLLFDS != NULL); _ST_POLLFDS_SIZE = _ST_POLL_OSFD_CNT + 10; } pollfds = _ST_POLLFDS; /* Gather all descriptors into one array */ for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); memcpy(pollfds, pq->pds, sizeof(struct pollfd) * pq->npds); pollfds += pq->npds; } ST_ASSERT(pollfds <= _ST_POLLFDS + _ST_POLLFDS_SIZE); if (_ST_SLEEPQ == NULL) { timeout = -1; } else { min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout = (int) (min_timeout / 1000); } /* Check for I/O operations */ nfd = poll(_ST_POLLFDS, _ST_POLL_OSFD_CNT, timeout); /* Notify threads that are associated with the selected descriptors */ if (nfd > 0) { pollfds = _ST_POLLFDS; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); epds = pollfds + pq->npds; for (pds = pollfds; pds < epds; pds++) { if (pds->revents) break; } if (pds < epds) { memcpy(pq->pds, pollfds, sizeof(struct pollfd) * pq->npds); ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); _ST_POLL_OSFD_CNT -= pq->npds; ST_ASSERT(_ST_POLL_OSFD_CNT >= 0); } pollfds = epds; } } }
ST_HIDDEN void _st_epoll_dispatch(void) { st_utime_t min_timeout; _st_clist_t *q; _st_pollq_t *pq; struct pollfd *pds, *epds; struct epoll_event ev; int timeout, nfd, i, osfd, notify; int events, op; short revents; if (_ST_SLEEPQ == NULL) { timeout = -1; } else { min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout = (int) (min_timeout / 1000); } if (_st_epoll_data->pid != getpid()) { /* We probably forked, reinitialize epoll set */ close(_st_epoll_data->epfd); _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); if (_st_epoll_data->epfd < 0) { /* There is nothing we can do here, will retry later */ return; } fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); _st_epoll_data->pid = getpid(); /* Put all descriptors on ioq into new epoll set */ memset(_st_epoll_data->fd_data, 0, _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); _st_epoll_data->evtlist_cnt = 0; for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); _st_epoll_pollset_add(pq->pds, pq->npds); } } /* Check for I/O operations */ nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, _st_epoll_data->evtlist_size, timeout); if (nfd > 0) { for (i = 0; i < nfd; i++) { osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { /* Also set I/O bits on error */ _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); } } for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; for (pds = pq->pds; pds < epds; pds++) { if (_ST_EPOLL_REVENTS(pds->fd) == 0) { pds->revents = 0; continue; } osfd = pds->fd; events = pds->events; revents = 0; if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) revents |= POLLIN; if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) revents |= POLLOUT; if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) revents |= POLLPRI; if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) revents |= POLLERR; if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) revents |= POLLHUP; pds->revents = revents; if (revents) { notify = 1; } } if (notify) { ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Here we will only delete/modify descriptors that * didn't fire (see comments in _st_epoll_pollset_del()). */ _st_epoll_pollset_del(pq->pds, pq->npds); if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } } for (i = 0; i < nfd; i++) { /* Delete/modify descriptors that fired */ osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = 0; events = _ST_EPOLL_EVENTS(osfd); op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; ev.events = events; ev.data.fd = osfd; if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && op == EPOLL_CTL_DEL) { _st_epoll_data->evtlist_cnt--; } } } }
// epoll 事件分发 ST_HIDDEN void _st_epoll_dispatch(void) { st_utime_t min_timeout; _st_clist_t *q; _st_pollq_t *pq; struct pollfd *pds, *epds; struct epoll_event ev; int timeout, nfd, i, osfd, notify; int events, op; short revents; if (_ST_SLEEPQ == NULL) { timeout = -1; } else { // 获取最早睡眠的线程还要睡多久(用于epoll超时) min_timeout = (_ST_SLEEPQ->due <= _ST_LAST_CLOCK) ? 0 : (_ST_SLEEPQ->due - _ST_LAST_CLOCK); timeout = (int) (min_timeout / 1000); } // fork 出的子进程,则重新获取 epoll fd if (_st_epoll_data->pid != getpid()) { /* We probably forked, reinitialize epoll set */ close(_st_epoll_data->epfd); _st_epoll_data->epfd = epoll_create(_st_epoll_data->fd_hint); if (_st_epoll_data->epfd < 0) { /* There is nothing we can do here, will retry later */ return; } // exec 调用时关闭 fcntl(_st_epoll_data->epfd, F_SETFD, FD_CLOEXEC); _st_epoll_data->pid = getpid(); /* Put all descriptors on ioq into new epoll set */ memset(_st_epoll_data->fd_data, 0, _st_epoll_data->fd_data_size * sizeof(_epoll_fd_data_t)); _st_epoll_data->evtlist_cnt = 0; // 将 io 队列的 net fd 都加入事件系统 for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); _st_epoll_pollset_add(pq->pds, pq->npds); } } /* Check for I/O operations */ nfd = epoll_wait(_st_epoll_data->epfd, _st_epoll_data->evtlist, _st_epoll_data->evtlist_size, timeout); if (nfd > 0) { for (i = 0; i < nfd; i++) { osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = _st_epoll_data->evtlist[i].events; if (_ST_EPOLL_REVENTS(osfd) & (EPOLLERR | EPOLLHUP)) { /* Also set I/O bits on error */ _ST_EPOLL_REVENTS(osfd) |= _ST_EPOLL_EVENTS(osfd); } } // #### // 依次从 io 队列取出每个线程等待 io 的 pollfd,pq 是一个线程中加入的所有文件描述符 for (q = _ST_IOQ.next; q != &_ST_IOQ; q = q->next) { pq = _ST_POLLQUEUE_PTR(q); notify = 0; epds = pq->pds + pq->npds; // 遍历每个线程 pollfd ,获取对应的事件,然后从 io 队列中移除;如果 pollfd 上 // 有事件发生,则将其从 io 队列移除,没有事件的继续在 io 队列等待 for (pds = pq->pds; pds < epds; pds++) { if (_ST_EPOLL_REVENTS(pds->fd) == 0) { pds->revents = 0; continue; } osfd = pds->fd; events = pds->events; revents = 0; if ((events & POLLIN) && (_ST_EPOLL_REVENTS(osfd) & EPOLLIN)) revents |= POLLIN; if ((events & POLLOUT) && (_ST_EPOLL_REVENTS(osfd) & EPOLLOUT)) revents |= POLLOUT; if ((events & POLLPRI) && (_ST_EPOLL_REVENTS(osfd) & EPOLLPRI)) revents |= POLLPRI; if (_ST_EPOLL_REVENTS(osfd) & EPOLLERR) revents |= POLLERR; if (_ST_EPOLL_REVENTS(osfd) & EPOLLHUP) revents |= POLLHUP; pds->revents = revents; // 判断是不是有事件发生 if (revents) { notify = 1; } } if (notify) { // 将有时间发生的描述符从 io 队列移除 ST_REMOVE_LINK(&pq->links); pq->on_ioq = 0; /* * Here we will only delete/modify descriptors that * didn't fire (see comments in _st_epoll_pollset_del()). */ // 将当前线程已处理完事件的文件描述符从事件系统中移除 _st_epoll_pollset_del(pq->pds, pq->npds); // 如果当前线程处于睡眠则从睡眠队列移除,然后将当前线程加入运行队列 if (pq->thread->flags & _ST_FL_ON_SLEEPQ) _ST_DEL_SLEEPQ(pq->thread); pq->thread->state = _ST_ST_RUNNABLE; _ST_ADD_RUNQ(pq->thread); } } // 修改剩下还有等待事件的描述符 for (i = 0; i < nfd; i++) { /* Delete/modify descriptors that fired */ osfd = _st_epoll_data->evtlist[i].data.fd; _ST_EPOLL_REVENTS(osfd) = 0; events = _ST_EPOLL_EVENTS(osfd); op = events ? EPOLL_CTL_MOD : EPOLL_CTL_DEL; ev.events = events; ev.data.fd = osfd; if (epoll_ctl(_st_epoll_data->epfd, op, osfd, &ev) == 0 && op == EPOLL_CTL_DEL) { _st_epoll_data->evtlist_cnt--; } } } }