static void start_next_write(pj_ioqueue_key_t *key) { if (key_has_pending_write(key)) { PjUwpSocket *s = (PjUwpSocket*)key->fd; struct write_operation *op; op = (struct write_operation*)key->write_list.next; if (op->op == PJ_IOQUEUE_OP_SEND) s->Send(op->buf, (pj_ssize_t*)&op->size); else s->SendTo(op->buf, (pj_ssize_t*)&op->size, &op->rmt_addr); } }
/* * pj_ioqueue_poll() * */ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) { int i, count, processed; int msec; //struct epoll_event *events = ioqueue->events; //struct queue *queue = ioqueue->queue; struct epoll_event events[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; struct queue queue[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; pj_timestamp t1, t2; PJ_CHECK_STACK(); msec = timeout ? PJ_TIME_VAL_MSEC(*timeout) : 9000; TRACE_((THIS_FILE, "start os_epoll_wait, msec=%d", msec)); pj_get_timestamp(&t1); //count = os_epoll_wait( ioqueue->epfd, events, ioqueue->max, msec); count = os_epoll_wait( ioqueue->epfd, events, PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL, msec); if (count == 0) { #if PJ_IOQUEUE_HAS_SAFE_UNREG /* Check the closing keys only when there's no activity and when there are * pending closing keys. */ if (count == 0 && !pj_list_empty(&ioqueue->closing_list)) { pj_lock_acquire(ioqueue->lock); scan_closing_keys(ioqueue); pj_lock_release(ioqueue->lock); } #endif TRACE_((THIS_FILE, "os_epoll_wait timed out")); return count; } else if (count < 0) { TRACE_((THIS_FILE, "os_epoll_wait error")); return -pj_get_netos_error(); } pj_get_timestamp(&t2); TRACE_((THIS_FILE, "os_epoll_wait returns %d, time=%d usec", count, pj_elapsed_usec(&t1, &t2))); /* Lock ioqueue. */ pj_lock_acquire(ioqueue->lock); for (processed=0, i=0; i<count; ++i) { pj_ioqueue_key_t *h = (pj_ioqueue_key_t*)(epoll_data_type) events[i].epoll_data; TRACE_((THIS_FILE, "event %d: events=%d", i, events[i].events)); /* * Check readability. */ if ((events[i].events & EPOLLIN) && (key_has_pending_read(h) || key_has_pending_accept(h)) && !IS_CLOSING(h) ) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = READABLE_EVENT; ++processed; continue; } /* * Check for writeability. */ if ((events[i].events & EPOLLOUT) && key_has_pending_write(h) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = WRITEABLE_EVENT; ++processed; continue; } #if PJ_HAS_TCP /* * Check for completion of connect() operation. */ if ((events[i].events & EPOLLOUT) && (h->connecting) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = WRITEABLE_EVENT; ++processed; continue; } #endif /* PJ_HAS_TCP */ /* * Check for error condition. */ if ((events[i].events & EPOLLERR) && !IS_CLOSING(h)) { /* * We need to handle this exception event. If it's related to us * connecting, report it as such. If not, just report it as a * read event and the higher layers will handle it. */ if (h->connecting) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = EXCEPTION_EVENT; ++processed; } else if (key_has_pending_read(h) || key_has_pending_accept(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif queue[processed].key = h; queue[processed].event_type = READABLE_EVENT; ++processed; } continue; } } for (i=0; i<processed; ++i) { if (queue[i].key->grp_lock) pj_grp_lock_add_ref_dbg(queue[i].key->grp_lock, "ioqueue", 0); } PJ_RACE_ME(5); pj_lock_release(ioqueue->lock); PJ_RACE_ME(5); /* Now process the events. */ for (i=0; i<processed; ++i) { switch (queue[i].event_type) { case READABLE_EVENT: ioqueue_dispatch_read_event(ioqueue, queue[i].key); break; case WRITEABLE_EVENT: ioqueue_dispatch_write_event(ioqueue, queue[i].key); break; case EXCEPTION_EVENT: ioqueue_dispatch_exception_event(ioqueue, queue[i].key); break; case NO_EVENT: pj_assert(!"Invalid event!"); break; } #if PJ_IOQUEUE_HAS_SAFE_UNREG decrement_counter(queue[i].key); #endif if (queue[i].key->grp_lock) pj_grp_lock_dec_ref_dbg(queue[i].key->grp_lock, "ioqueue", 0); } /* Special case: * When epoll returns > 0 but no descriptors are actually set! */ if (count > 0 && !processed && msec > 0) { pj_thread_sleep(msec); } pj_get_timestamp(&t1); TRACE_((THIS_FILE, "ioqueue_poll() returns %d, time=%d usec", processed, pj_elapsed_usec(&t2, &t1))); return processed; }
/* * pj_ioqueue_poll() * * Few things worth written: * * - we used to do only one callback called per poll, but it didn't go * very well. The reason is because on some situation, the write * callback gets called all the time, thus doesn't give the read * callback to get called. This happens, for example, when user * submit write operation inside the write callback. * As the result, we changed the behaviour so that now multiple * callbacks are called in a single poll. It should be fast too, * just that we need to be carefull with the ioqueue data structs. * * - to guarantee preemptiveness etc, the poll function must strictly * work on fd_set copy of the ioqueue (not the original one). */ PJ_DEF(int) pj_ioqueue_poll( pj_ioqueue_t *ioqueue, const pj_time_val *timeout) { pj_fd_set_t rfdset, wfdset, xfdset; int count, counter; pj_ioqueue_key_t *h; struct event { pj_ioqueue_key_t *key; enum ioqueue_event_type event_type; } event[PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL]; PJ_ASSERT_RETURN(ioqueue, -PJ_EINVAL); /* Lock ioqueue before making fd_set copies */ pj_lock_acquire(ioqueue->lock); /* We will only do select() when there are sockets to be polled. * Otherwise select() will return error. */ if (PJ_FD_COUNT(&ioqueue->rfdset)==0 && PJ_FD_COUNT(&ioqueue->wfdset)==0 #if defined(PJ_HAS_TCP) && PJ_HAS_TCP!=0 && PJ_FD_COUNT(&ioqueue->xfdset)==0 #endif ) { #if PJ_IOQUEUE_HAS_SAFE_UNREG scan_closing_keys(ioqueue); #endif pj_lock_release(ioqueue->lock); TRACE__((THIS_FILE, " poll: no fd is set")); if (timeout) pj_thread_sleep(PJ_TIME_VAL_MSEC(*timeout)); return 0; } /* Copy ioqueue's pj_fd_set_t to local variables. */ pj_memcpy(&rfdset, &ioqueue->rfdset, sizeof(pj_fd_set_t)); pj_memcpy(&wfdset, &ioqueue->wfdset, sizeof(pj_fd_set_t)); #if PJ_HAS_TCP pj_memcpy(&xfdset, &ioqueue->xfdset, sizeof(pj_fd_set_t)); #else PJ_FD_ZERO(&xfdset); #endif #if VALIDATE_FD_SET validate_sets(ioqueue, &rfdset, &wfdset, &xfdset); #endif /* Unlock ioqueue before select(). */ pj_lock_release(ioqueue->lock); count = pj_sock_select(ioqueue->nfds+1, &rfdset, &wfdset, &xfdset, timeout); if (count == 0) return 0; else if (count < 0) return -pj_get_netos_error(); else if (count > PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL) count = PJ_IOQUEUE_MAX_EVENTS_IN_SINGLE_POLL; /* Scan descriptor sets for event and add the events in the event * array to be processed later in this function. We do this so that * events can be processed in parallel without holding ioqueue lock. */ pj_lock_acquire(ioqueue->lock); counter = 0; /* Scan for writable sockets first to handle piggy-back data * coming with accept(). */ h = ioqueue->active_list.next; for ( ; h!=&ioqueue->active_list && counter<count; h = h->next) { if ( (key_has_pending_write(h) || key_has_pending_connect(h)) && PJ_FD_ISSET(h->fd, &wfdset) && !IS_CLOSING(h)) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = WRITEABLE_EVENT; ++counter; } /* Scan for readable socket. */ if ((key_has_pending_read(h) || key_has_pending_accept(h)) && PJ_FD_ISSET(h->fd, &rfdset) && !IS_CLOSING(h) && counter<count) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = READABLE_EVENT; ++counter; } #if PJ_HAS_TCP if (key_has_pending_connect(h) && PJ_FD_ISSET(h->fd, &xfdset) && !IS_CLOSING(h) && counter<count) { #if PJ_IOQUEUE_HAS_SAFE_UNREG increment_counter(h); #endif event[counter].key = h; event[counter].event_type = EXCEPTION_EVENT; ++counter; } #endif } pj_lock_release(ioqueue->lock); count = counter; /* Now process all events. The dispatch functions will take care * of locking in each of the key */ for (counter=0; counter<count; ++counter) { switch (event[counter].event_type) { case READABLE_EVENT: ioqueue_dispatch_read_event(ioqueue, event[counter].key); break; case WRITEABLE_EVENT: ioqueue_dispatch_write_event(ioqueue, event[counter].key); break; case EXCEPTION_EVENT: ioqueue_dispatch_exception_event(ioqueue, event[counter].key); break; case NO_EVENT: pj_assert(!"Invalid event!"); break; } #if PJ_IOQUEUE_HAS_SAFE_UNREG decrement_counter(event[counter].key); #endif } return count; }
/* * ioqueue_dispatch_event() * * Report occurence of an event in the key to be processed by the * framework. */ void ioqueue_dispatch_write_event(pj_ioqueue_t *ioqueue, pj_ioqueue_key_t *h) { /* Lock the key. */ pj_mutex_lock(h->mutex); if (IS_CLOSING(h)) { pj_mutex_unlock(h->mutex); return; } #if defined(PJ_HAS_TCP) && PJ_HAS_TCP!=0 if (h->connecting) { /* Completion of connect() operation */ pj_ssize_t bytes_transfered; pj_bool_t has_lock; /* Clear operation. */ h->connecting = 0; ioqueue_remove_from_set(ioqueue, h, WRITEABLE_EVENT); ioqueue_remove_from_set(ioqueue, h, EXCEPTION_EVENT); #if (defined(PJ_HAS_SO_ERROR) && PJ_HAS_SO_ERROR!=0) /* from connect(2): * On Linux, use getsockopt to read the SO_ERROR option at * level SOL_SOCKET to determine whether connect() completed * successfully (if SO_ERROR is zero). */ { int value; int vallen = sizeof(value); int gs_rc = pj_sock_getsockopt(h->fd, SOL_SOCKET, SO_ERROR, &value, &vallen); if (gs_rc != 0) { /* Argh!! What to do now??? * Just indicate that the socket is connected. The * application will get error as soon as it tries to use * the socket to send/receive. */ bytes_transfered = 0; } else { bytes_transfered = value; } } #elif defined(PJ_WIN32) && PJ_WIN32!=0 bytes_transfered = 0; /* success */ #else /* Excellent information in D.J. Bernstein page: * http://cr.yp.to/docs/connect.html * * Seems like the most portable way of detecting connect() * failure is to call getpeername(). If socket is connected, * getpeername() will return 0. If the socket is not connected, * it will return ENOTCONN, and read(fd, &ch, 1) will produce * the right errno through error slippage. This is a combination * of suggestions from Douglas C. Schmidt and Ken Keys. */ { int gp_rc; struct sockaddr_in addr; socklen_t addrlen = sizeof(addr); gp_rc = getpeername(h->fd, (struct sockaddr*)&addr, &addrlen); bytes_transfered = (gp_rc < 0) ? gp_rc : -gp_rc; } #endif /* Unlock; from this point we don't need to hold key's mutex * (unless concurrency is disabled, which in this case we should * hold the mutex while calling the callback) */ if (h->allow_concurrent) { /* concurrency may be changed while we're in the callback, so * save it to a flag. */ has_lock = PJ_FALSE; pj_mutex_unlock(h->mutex); } else { has_lock = PJ_TRUE; } /* Call callback. */ if (h->cb.on_connect_complete && !IS_CLOSING(h)) (*h->cb.on_connect_complete)(h, bytes_transfered); /* Unlock if we still hold the lock */ if (has_lock) { pj_mutex_unlock(h->mutex); } /* Done. */ } else #endif /* PJ_HAS_TCP */ if (key_has_pending_write(h)) { /* Socket is writable. */ struct write_operation *write_op; pj_ssize_t sent; pj_status_t send_rc; /* Get the first in the queue. */ write_op = h->write_list.next; /* For datagrams, we can remove the write_op from the list * so that send() can work in parallel. */ if (h->fd_type == pj_SOCK_DGRAM()) { pj_list_erase(write_op); if (pj_list_empty(&h->write_list)) ioqueue_remove_from_set(ioqueue, h, WRITEABLE_EVENT); } /* Send the data. * Unfortunately we must do this while holding key's mutex, thus * preventing parallel write on a single key.. :-(( */ sent = write_op->size - write_op->written; if (write_op->op == PJ_IOQUEUE_OP_SEND) { send_rc = pj_sock_send(h->fd, write_op->buf+write_op->written, &sent, write_op->flags); /* Can't do this. We only clear "op" after we're finished sending * the whole buffer. */ //write_op->op = 0; } else if (write_op->op == PJ_IOQUEUE_OP_SEND_TO) { send_rc = pj_sock_sendto(h->fd, write_op->buf+write_op->written, &sent, write_op->flags, &write_op->rmt_addr, write_op->rmt_addrlen); /* Can't do this. We only clear "op" after we're finished sending * the whole buffer. */ //write_op->op = 0; } else { pj_assert(!"Invalid operation type!"); write_op->op = PJ_IOQUEUE_OP_NONE; send_rc = PJ_EBUG; } if (send_rc == PJ_SUCCESS) { write_op->written += sent; } else { pj_assert(send_rc > 0); write_op->written = -send_rc; } /* Are we finished with this buffer? */ if (send_rc!=PJ_SUCCESS || write_op->written == (pj_ssize_t)write_op->size || h->fd_type == pj_SOCK_DGRAM()) { pj_bool_t has_lock; write_op->op = PJ_IOQUEUE_OP_NONE; if (h->fd_type != pj_SOCK_DGRAM()) { /* Write completion of the whole stream. */ pj_list_erase(write_op); /* Clear operation if there's no more data to send. */ if (pj_list_empty(&h->write_list)) ioqueue_remove_from_set(ioqueue, h, WRITEABLE_EVENT); } /* Unlock; from this point we don't need to hold key's mutex * (unless concurrency is disabled, which in this case we should * hold the mutex while calling the callback) */ if (h->allow_concurrent) { /* concurrency may be changed while we're in the callback, so * save it to a flag. */ has_lock = PJ_FALSE; pj_mutex_unlock(h->mutex); } else { has_lock = PJ_TRUE; } /* Call callback. */ if (h->cb.on_write_complete && !IS_CLOSING(h)) { (*h->cb.on_write_complete)(h, (pj_ioqueue_op_key_t*)write_op, write_op->written); } if (has_lock) { pj_mutex_unlock(h->mutex); } } else { pj_mutex_unlock(h->mutex); } /* Done. */ } else { /* * This is normal; execution may fall here when multiple threads * are signalled for the same event, but only one thread eventually * able to process the event. */ pj_mutex_unlock(h->mutex); } }