/* * pj_ioqueue_poll() * */ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) { int i, count, processed; int msec; //struct epoll_event *events = ioqueue->events; //struct queue *queue = ioqueue->queue; struct epoll_event events[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; struct queue queue[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; pj_timestamp t1, t2; PJ_CHECK_STACK(); msec = timeout ? PJ_TIME_VAL_MSEC(*timeout) : 9000; TRACE_((THIS_FILE, "start os_epoll_wait, msec=%d", msec)); pj_get_timestamp(&t1); //count = os_epoll_wait( ioqueue->epfd, events, ioqueue->max, msec); count = os_epoll_wait( ioqueue->epfd, events, PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL, msec); if (count == 0) { #if PJ_IOQUEUE_HAS_SAFE_UNREG /* Check the closing keys only when there's no activity and when there are * pending closing keys. */ if (count == 0 && !pj_list_empty(&ioqueue->closing_list)) { pj_lock_acquire(ioqueue->lock); scan_closing_keys(ioqueue); pj_lock_release(ioqueue->lock); } #endif TRACE_((THIS_FILE, "os_epoll_wait timed out")); return count; } else if (count < 0) { TRACE_((THIS_FILE, "os_epoll_wait error")); return -pj_get_netos_error(); } pj_get_timestamp(&t2); TRACE_((THIS_FILE, "os_epoll_wait returns %d, time=%d usec", count, pj_elapsed_usec(&t1, &t2))); /* Lock ioqueue. */ pj_lock_acquire(ioqueue->lock); for (processed=0, i=0; i<count; ++i) { pj_ioqueue_key_t *h = (pj_ioqueue_key_t*)(epoll_data_type) events[i].epoll_data; TRACE_((THIS_FILE, "event %d: events=%d", i, events[i].events)); /* * Check readability. */ if ((events[i].events & EPOLLIN) && (key_has_pending_read(h) || key_has_pending_accept(h)) && !IS_CLOSING(h) ) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = READABLE_EVENT; ++processed; continue; } /* * Check for writeability. */ if ((events[i].events & EPOLLOUT) && key_has_pending_write(h) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = WRITEABLE_EVENT; ++processed; continue; } #if PJ_HAS_TCP /* * Check for completion of connect() operation. */ if ((events[i].events & EPOLLOUT) && (h->connecting) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = WRITEABLE_EVENT; ++processed; continue; } #endif /* PJ_HAS_TCP */ /* * Check for error condition. */ if ((events[i].events & EPOLLERR) && !IS_CLOSING(h)) { /* * We need to handle this exception event. If it's related to us * connecting, report it as such. If not, just report it as a * read event and the higher layers will handle it. */ if (h->connecting) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = EXCEPTION_EVENT; ++processed; } else if (key_has_pending_read(h) || key_has_pending_accept(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = READABLE_EVENT; ++processed; } continue; } } for (i=0; i<processed; ++i) { if (queue[i].key->grp_lock) pj_grp_lock_add_ref_dbg(queue[i].key->grp_lock, "ioqueue", 0); } PJ_RACE_ME(5); pj_lock_release(ioqueue->lock); PJ_RACE_ME(5); /* Now process the events. */ for (i=0; i<processed; ++i) { switch (queue[i].event_type) { case READABLE_EVENT: ioqueue_dispatch_read_event(ioqueue, queue[i].key); break; case WRITEABLE_EVENT: ioqueue_dispatch_write_event(ioqueue, queue[i].key); break; case EXCEPTION_EVENT: ioqueue_dispatch_exception_event(ioqueue, queue[i].key); break; case NO_EVENT: pj_assert(!"Invalid event!"); break; } #if PJ_IOQUEUE_HAS_SAFE_UNREG decrement_counter(queue[i].key); #endif if (queue[i].key->grp_lock) pj_grp_lock_dec_ref_dbg(queue[i].key->grp_lock, "ioqueue", 0); } /* Special case: * When epoll returns > 0 but no descriptors are actually set! */ if (count > 0 && !processed && msec > 0) { pj_thread_sleep(msec); } pj_get_timestamp(&t1); TRACE_((THIS_FILE, "ioqueue_poll() returns %d, time=%d usec", processed, pj_elapsed_usec(&t2, &t1))); return processed; }
/* * pj_ioqueue_poll() * * Few things worth written: * * - we used to do only one callback called per poll, but it didn't go * very well. The reason is because on some situation, the write * callback gets called all the time, thus doesn't give the read * callback to get called. This happens, for example, when user * submit write operation inside the write callback. * As the result, we changed the behaviour so that now multiple * callbacks are called in a single poll. It should be fast too, * just that we need to be carefull with the ioqueue data structs. * * - to guarantee preemptiveness etc, the poll function must strictly * work on fd_set copy of the ioqueue (not the original one). */ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) { pj_fd_set_t rfdset, wfdset, xfdset; int count, counter; pj_ioqueue_key_t *h; struct event { pj_ioqueue_key_t *key; enum ioqueue_event_type event_type; } event[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; PJ_ASSERT_RETURN(ioqueue, -PJ_EINVAL); /* Lock ioqueue before making fd_set copies */ pj_lock_acquire(ioqueue->lock); /* We will only do select() when there are sockets to be polled. * Otherwise select() will return error. */ if (PJ_FD_COUNT(&ioqueue->rfdset)==0 && PJ_FD_COUNT(&ioqueue->wfdset)==0 #if defined(PJ_HAS_TCP) && PJ_HAS_TCP!=0 && PJ_FD_COUNT(&ioqueue->xfdset)==0 #endif ) { #if PJ_IOQUEUE_HAS_SAFE_UNREG scan_closing_keys(ioqueue); #endif pj_lock_release(ioqueue->lock); TRACE__((THIS_FILE, " poll: no fd is set")); if (timeout) pj_thread_sleep(PJ_TIME_VAL_MSEC(*timeout)); return 0; } /* Copy ioqueue's pj_fd_set_t to local variables. */ pj_memcpy(&rfdset, &ioqueue->rfdset, sizeof(pj_fd_set_t)); pj_memcpy(&wfdset, &ioqueue->wfdset, sizeof(pj_fd_set_t)); #if PJ_HAS_TCP pj_memcpy(&xfdset, &ioqueue->xfdset, sizeof(pj_fd_set_t)); #else PJ_FD_ZERO(&xfdset); #endif #if VALIDATE_FD_SET validate_sets(ioqueue, &rfdset, &wfdset, &xfdset); #endif /* Unlock ioqueue before select(). */ pj_lock_release(ioqueue->lock); count = pj_sock_select(ioqueue->nfds+1, &rfdset, &wfdset, &xfdset, timeout); if (count == 0) return 0; else if (count < 0) return -pj_get_netos_error(); else if (count > PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL) count = PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL; /* Scan descriptor sets for event and add the events in the event * array to be processed later in this function. We do this so that * events can be processed in parallel without holding ioqueue lock. */ pj_lock_acquire(ioqueue->lock); counter = 0; /* Scan for writable sockets first to handle piggy-back data * coming with accept(). */ h = ioqueue->active_list.next; for ( ; h!=&ioqueue->active_list && counter<count; h = h->next) { if ( (key_has_pending_write(h) || key_has_pending_connect(h)) && PJ_FD_ISSET(h->fd, &wfdset) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = WRITEABLE_EVENT; ++counter; } /* Scan for readable socket. */ if ((key_has_pending_read(h) || key_has_pending_accept(h)) && PJ_FD_ISSET(h->fd, &rfdset) && !IS_CLOSING(h) && counter<count) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = READABLE_EVENT; ++counter; } #if PJ_HAS_TCP if (key_has_pending_connect(h) && PJ_FD_ISSET(h->fd, &xfdset) && !IS_CLOSING(h) && counter<count) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = EXCEPTION_EVENT; ++counter; } #endif } pj_lock_release(ioqueue->lock); count = counter; /* Now process all events. The dispatch functions will take care * of locking in each of the key */ for (counter=0; counter<count; ++counter) { switch (event[counter].event_type) { case READABLE_EVENT: ioqueue_dispatch_read_event(ioqueue, event[counter].key); break; case WRITEABLE_EVENT: ioqueue_dispatch_write_event(ioqueue, event[counter].key); break; case EXCEPTION_EVENT: ioqueue_dispatch_exception_event(ioqueue, event[counter].key); break; case NO_EVENT: pj_assert(!"Invalid event!"); break; } #if PJ_IOQUEUE_HAS_SAFE_UNREG decrement_counter(event[counter].key); #endif } return count; }