static void eventer_epoll_impl_add(eventer_t e) { int rv; struct epoll_spec *spec; struct epoll_event _ev; ev_lock_state_t lockstate; mtevAssert(e->mask); if(e->mask & EVENTER_ASYNCH) { eventer_add_asynch(NULL, e); return; } /* Recurrent delegation */ if(e->mask & EVENTER_RECURRENT) { eventer_add_recurrent(e); return; } /* Timed events are simple */ if(e->mask & EVENTER_TIMER) { eventer_add_timed(e); return; } spec = eventer_get_spec_for_event(e); /* file descriptor event */ mtevAssert(e->whence.tv_sec == 0 && e->whence.tv_usec == 0); memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = e->fd; if(e->mask & EVENTER_READ) _ev.events |= (EPOLLIN|EPOLLPRI); if(e->mask & EVENTER_WRITE) _ev.events |= (EPOLLOUT); if(e->mask & EVENTER_EXCEPTION) _ev.events |= (EPOLLERR|EPOLLHUP); lockstate = acquire_master_fd(e->fd); master_fds[e->fd].e = e; rv = epoll_ctl(spec->epoll_fd, EPOLL_CTL_ADD, e->fd, &_ev); if(rv != 0) { mtevFatal(mtev_error, "epoll_ctl(%d,add,%d,%x) -> %d (%d: %s)\n", spec->epoll_fd, e->fd, e->mask, rv, errno, strerror(errno)); } release_master_fd(e->fd, lockstate); }
static void eventer_epoll_impl_update(eventer_t e, int mask) { struct epoll_event _ev; if(e->mask & EVENTER_TIMER) { eventer_update_timed(e,mask); return; } memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = e->fd; e->mask = mask; if(e->mask & (EVENTER_READ | EVENTER_WRITE | EVENTER_EXCEPTION)) { struct epoll_spec *spec; spec = eventer_get_spec_for_event(e); if(e->mask & EVENTER_READ) _ev.events |= (EPOLLIN|EPOLLPRI); if(e->mask & EVENTER_WRITE) _ev.events |= (EPOLLOUT); if(e->mask & EVENTER_EXCEPTION) _ev.events |= (EPOLLERR|EPOLLHUP); if(epoll_ctl(spec->epoll_fd, EPOLL_CTL_MOD, e->fd, &_ev) != 0) { mtevFatal(mtev_error, "epoll_ctl(%d, EPOLL_CTL_MOD, %d) -> %s\n", spec->epoll_fd, e->fd, strerror(errno)); } } }
static eventer_t eventer_epoll_impl_remove(eventer_t e) { struct epoll_spec *spec; eventer_t removed = NULL; if(e->mask & EVENTER_ASYNCH) { mtevFatal(mtev_error, "error in eventer_epoll_impl_remove: got unexpected EVENTER_ASYNCH mask\n"); } if(e->mask & (EVENTER_READ | EVENTER_WRITE | EVENTER_EXCEPTION)) { ev_lock_state_t lockstate; struct epoll_event _ev; spec = eventer_get_spec_for_event(e); memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = e->fd; lockstate = acquire_master_fd(e->fd); if(e == master_fds[e->fd].e) { removed = e; master_fds[e->fd].e = NULL; mtevL(eventer_deb, "epoll_ctl(%d, del, %d)\n", spec->epoll_fd, e->fd); if(epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, e->fd, &_ev) != 0) { mtevL(mtev_error, "epoll_ctl(%d, EPOLL_CTL_DEL, %d) -> %s\n", spec->epoll_fd, e->fd, strerror(errno)); if(errno != ENOENT) { mtevFatal(mtev_error, "errno != ENOENT: %d (%s)\n", errno, strerror(errno)); } } } release_master_fd(e->fd, lockstate); } else if(e->mask & EVENTER_TIMER) { removed = eventer_remove_timed(e); } else if(e->mask & EVENTER_RECURRENT) { removed = eventer_remove_recurrent(e); } else { mtevFatal(mtev_error, "error in eventer_epoll_impl_remove: got unknown mask (0x%04x)\n", e->mask); } return removed; }
static eventer_t eventer_epoll_impl_remove_fd(int fd) { eventer_t eiq = NULL; ev_lock_state_t lockstate; if(master_fds[fd].e) { struct epoll_spec *spec; struct epoll_event _ev; memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = fd; lockstate = acquire_master_fd(fd); eiq = master_fds[fd].e; spec = eventer_get_spec_for_event(eiq); master_fds[fd].e = NULL; if(epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, fd, &_ev) != 0) { mtevL(mtev_error, "epoll_ctl(%d, EPOLL_CTL_DEL, %d) -> %s\n", spec->epoll_fd, fd, strerror(errno)); if(errno != ENOENT) { mtevFatal(mtev_error, "errno != ENOENT: %d (%s)\n", errno, strerror(errno)); } } release_master_fd(fd, lockstate); } return eiq; }
static void eventer_ports_impl_wakeup(eventer_t e) { struct ports_spec *spec = eventer_get_spec_for_event(e); if(mtev_spinlock_trylock(&spec->wakeup_notify)) port_send(spec->port_fd, 0, NULL); }
static int eventer_ports_impl_loop(int id) { struct timeval __dyna_sleep = { 0, 0 }; struct ports_spec *spec; spec = eventer_get_spec_for_event(NULL); while(1) { struct timeval __sleeptime; struct timespec __ports_sleeptime; unsigned int fd_cnt = 0; int ret; port_event_t pevents[MAX_PORT_EVENTS]; if(compare_timeval(eventer_max_sleeptime, __dyna_sleep) < 0) __dyna_sleep = eventer_max_sleeptime; __sleeptime = __dyna_sleep; eventer_dispatch_timed(&__sleeptime); if(compare_timeval(__sleeptime, __dyna_sleep) > 0) __sleeptime = __dyna_sleep; /* Handle cross_thread dispatches */ eventer_cross_thread_process(); /* Handle recurrent events */ eventer_dispatch_recurrent(); /* Now we move on to our fd-based events */ __ports_sleeptime.tv_sec = __sleeptime.tv_sec; __ports_sleeptime.tv_nsec = __sleeptime.tv_usec * 1000; fd_cnt = 1; pevents[0].portev_source = 65535; /* This is impossible */ ret = port_getn(spec->port_fd, pevents, MAX_PORT_EVENTS, &fd_cnt, &__ports_sleeptime); spec->wakeup_notify = 0; /* force unlock */ /* The timeout case is a tad complex with ports. -1/ETIME is clearly * a timeout. However, it i spossible that we got that and fd_cnt isn't * 0, which means we both timed out and got events... WTF? */ if(fd_cnt == 0 || (ret == -1 && errno == ETIME && pevents[0].portev_source == 65535)) add_timeval(__dyna_sleep, __dyna_increment, &__dyna_sleep); if(ret == -1 && (errno != ETIME && errno != EINTR)) mtevL(eventer_err, "port_getn: %s\n", strerror(errno)); if(ret < 0) mtevL(eventer_deb, "port_getn: %s\n", strerror(errno)); mtevL(eventer_deb, "debug: port_getn(%d, [], %d) => %d\n", spec->port_fd, fd_cnt, ret); if(pevents[0].portev_source == 65535) { /* the impossible still remains, which means our fd_cnt _must_ be 0 */ fd_cnt = 0; } if(fd_cnt > 0) { int idx; /* Loop a last time to process */ __dyna_sleep.tv_sec = __dyna_sleep.tv_usec = 0; /* reset */ for(idx = 0; idx < fd_cnt; idx++) { port_event_t *pe; eventer_t e; int fd, mask; pe = &pevents[idx]; if(pe->portev_source != PORT_SOURCE_FD) continue; fd = (int)pe->portev_object; mtevAssert((intptr_t)pe->portev_user == fd); e = master_fds[fd].e; /* It's possible that someone removed the event and freed it * before we got here.... bail out if we're null. */ if (!e) continue; mask = 0; if(pe->portev_events & (POLLIN | POLLHUP)) mask |= EVENTER_READ; if(pe->portev_events & (POLLOUT)) mask |= EVENTER_WRITE; if(pe->portev_events & (POLLERR | POLLHUP | POLLNVAL)) mask |= EVENTER_EXCEPTION; eventer_ports_impl_trigger(e, mask); } } } /* NOTREACHED */ return 0; }
static int eventer_epoll_impl_loop() { struct epoll_event *epev; struct epoll_spec *spec; spec = eventer_get_spec_for_event(NULL); epev = malloc(sizeof(*epev) * maxfds); #ifdef HAVE_SYS_EVENTFD_H if(spec->event_fd >= 0) { eventer_t e = eventer_alloc(); e->callback = eventer_epoll_eventfd_read; e->fd = spec->event_fd; e->mask = EVENTER_READ; eventer_add(e); } #endif while(1) { struct timeval __now, __sleeptime; int fd_cnt = 0; __sleeptime = eventer_max_sleeptime; mtev_gettimeofday(&__now, NULL); eventer_dispatch_timed(&__now, &__sleeptime); /* Handle cross_thread dispatches */ eventer_cross_thread_process(); /* Handle recurrent events */ eventer_dispatch_recurrent(&__now); /* Now we move on to our fd-based events */ do { fd_cnt = epoll_wait(spec->epoll_fd, epev, maxfds, __sleeptime.tv_sec * 1000 + __sleeptime.tv_usec / 1000); } while(fd_cnt < 0 && errno == EINTR); mtevLT(eventer_deb, &__now, "debug: epoll_wait(%d, [], %d) => %d\n", spec->epoll_fd, maxfds, fd_cnt); if(fd_cnt < 0) { mtevLT(eventer_err, &__now, "epoll_wait: %s\n", strerror(errno)); } else { int idx; /* loop once to clear */ for(idx = 0; idx < fd_cnt; idx++) { struct epoll_event *ev; eventer_t e; int fd, mask = 0; ev = &epev[idx]; if(ev->events & (EPOLLIN | EPOLLPRI)) mask |= EVENTER_READ; if(ev->events & (EPOLLOUT)) mask |= EVENTER_WRITE; if(ev->events & (EPOLLERR|EPOLLHUP)) mask |= EVENTER_EXCEPTION; fd = ev->data.fd; e = master_fds[fd].e; /* It's possible that someone removed the event and freed it * before we got here. */ if(!e) continue; eventer_epoll_impl_trigger(e, mask); } } } /* NOTREACHED */ return 0; }
static void eventer_epoll_impl_trigger(eventer_t e, int mask) { struct epoll_spec *spec; struct timeval __now; int fd, newmask; const char *cbname; ev_lock_state_t lockstate; int cross_thread = mask & EVENTER_CROSS_THREAD_TRIGGER; int added_to_master_fds = 0; u_int64_t start, duration; mask = mask & ~(EVENTER_RESERVED); fd = e->fd; if(cross_thread) { if(master_fds[fd].e != NULL) { mtevL(eventer_deb, "Attempting to trigger already-registered event fd: %d cross thread.\n", fd); } /* mtevAssert(master_fds[fd].e == NULL); */ } if(!pthread_equal(pthread_self(), e->thr_owner)) { /* If we're triggering across threads, it can't be registered yet */ if(master_fds[fd].e != NULL) { mtevL(eventer_deb, "Attempting to trigger already-registered event fd: %d cross thread.\n", fd); } /* mtevAssert(master_fds[fd].e == NULL); */ eventer_cross_thread_trigger(e,mask); return; } if(master_fds[fd].e == NULL) { master_fds[fd].e = e; e->mask = 0; added_to_master_fds = 1; } if(e != master_fds[fd].e) return; lockstate = acquire_master_fd(fd); if(lockstate == EV_ALREADY_OWNED) return; mtevAssert(lockstate == EV_OWNED); mtev_gettimeofday(&__now, NULL); cbname = eventer_name_for_callback_e(e->callback, e); mtevLT(eventer_deb, &__now, "epoll: fire on %d/%x to %s(%p)\n", fd, mask, cbname?cbname:"???", e->callback); mtev_memory_begin(); LIBMTEV_EVENTER_CALLBACK_ENTRY((void *)e, (void *)e->callback, (char *)cbname, fd, e->mask, mask); start = mtev_gethrtime(); newmask = e->callback(e, mask, e->closure, &__now); duration = mtev_gethrtime() - start; LIBMTEV_EVENTER_CALLBACK_RETURN((void *)e, (void *)e->callback, (char *)cbname, newmask); mtev_memory_end(); stats_set_hist_intscale(eventer_callback_latency, duration, -9, 1); stats_set_hist_intscale(eventer_latency_handle_for_callback(e->callback), duration, -9, 1); if(newmask) { struct epoll_event _ev; memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = fd; if(newmask & EVENTER_READ) _ev.events |= (EPOLLIN|EPOLLPRI); if(newmask & EVENTER_WRITE) _ev.events |= (EPOLLOUT); if(newmask & EVENTER_EXCEPTION) _ev.events |= (EPOLLERR|EPOLLHUP); if(master_fds[fd].e == NULL) { mtevL(mtev_debug, "eventer %s(%p) epoll asked to modify descheduled fd: %d\n", cbname?cbname:"???", e->callback, fd); } else { if(!pthread_equal(pthread_self(), e->thr_owner)) { pthread_t tgt = e->thr_owner; e->thr_owner = pthread_self(); spec = eventer_get_spec_for_event(e); if(! added_to_master_fds && epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, fd, &_ev) != 0) { mtevFatal(mtev_error, "epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, fd, &_ev) failed; " "spec->epoll_fd: %d; fd: %d; errno: %d (%s)\n", spec->epoll_fd, fd, errno, strerror(errno)); } e->thr_owner = tgt; spec = eventer_get_spec_for_event(e); mtevAssert(epoll_ctl(spec->epoll_fd, EPOLL_CTL_ADD, fd, &_ev) == 0); mtevL(eventer_deb, "moved event[%p] from t@%d to t@%d\n", e, (int)pthread_self(), (int)tgt); } else { int epoll_cmd = added_to_master_fds ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; spec = eventer_get_spec_for_event(e); if(epoll_ctl(spec->epoll_fd, epoll_cmd, fd, &_ev) != 0) { const char *cb_name = eventer_name_for_callback_e(e->callback, e); mtevFatal(mtev_error, "epoll_ctl(spec->epoll_fd, EPOLL_CTL_MOD, fd, &_ev) failed; " "spec->epoll_fd: %d; fd: %d; errno: %d (%s); callback: %s\n", spec->epoll_fd, fd, errno, strerror(errno), cb_name ? cb_name : "???"); } } } /* Set our mask */ e->mask = newmask; } else { /* see kqueue implementation for details on the next line */ if(master_fds[fd].e == e) master_fds[fd].e = NULL; eventer_free(e); } release_master_fd(fd, lockstate); }
static void eventer_epoll_impl_trigger(eventer_t e, int mask) { struct epoll_spec *spec; struct timeval __now; int fd, newmask, needs_add = 0; const char *cbname; ev_lock_state_t lockstate; int cross_thread = mask & EVENTER_CROSS_THREAD_TRIGGER; uint64_t start, duration; mask = mask & ~(EVENTER_RESERVED); fd = e->fd; if(cross_thread) { if(master_fds[fd].e != NULL) { mtevL(eventer_deb, "Attempting to trigger already-registered event fd: %d cross thread.\n", fd); } /* mtevAssert(master_fds[fd].e == NULL); */ } if(!pthread_equal(pthread_self(), e->thr_owner)) { /* If we're triggering across threads, it can't be registered yet */ if(master_fds[fd].e != NULL) { mtevL(eventer_deb, "Attempting to trigger already-registered event fd: %d cross thread.\n", fd); } /* mtevAssert(master_fds[fd].e == NULL); */ eventer_cross_thread_trigger(e,mask); return; } if(master_fds[fd].e == NULL) { lockstate = acquire_master_fd(fd); if (lockstate == EV_ALREADY_OWNED) { /* The incoming triggered event is already owned by this thread. * This means our floated event completed before the current * event handler even exited. So it retriggered recursively * from inside the event handler. * * Treat this special case the same as a cross thread trigger * and just queue this event to be picked up on the next loop */ eventer_cross_thread_trigger(e, mask); return; } /* * If we are readding the event to the master list here, also do the needful * with the epoll_ctl. * * This can happen in cases where some event was floated and the float * completed so fast that we finished the job in the same thread * that it started in. Since we `eventer_remove_fd` before we float * the re-add here should replace the fd in the epoll_ctl. */ master_fds[fd].e = e; e->mask = 0; struct epoll_event _ev; memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = fd; spec = eventer_get_spec_for_event(e); if(mask & EVENTER_READ) _ev.events |= (EPOLLIN|EPOLLPRI); if(mask & EVENTER_WRITE) _ev.events |= (EPOLLOUT); if(mask & EVENTER_EXCEPTION) _ev.events |= (EPOLLERR|EPOLLHUP); mtevL(eventer_deb, "epoll_ctl(%d, add, %d)\n", spec->epoll_fd, fd); if (epoll_ctl(spec->epoll_fd, EPOLL_CTL_ADD, fd, &_ev) != 0) { mtevL(mtev_error, "epoll_ctl(%d, add, %d, %d)\n", spec->epoll_fd, fd, errno); } release_master_fd(fd, lockstate); } if(e != master_fds[fd].e) { mtevL(mtev_error, "Incoming event: %p, does not match master list: %p\n", e, master_fds[fd].e); return; } lockstate = acquire_master_fd(fd); if(lockstate == EV_ALREADY_OWNED) { mtevL(eventer_deb, "Incoming event: %p already owned by this thread\n", e); return; } mtevAssert(lockstate == EV_OWNED); mtev_gettimeofday(&__now, NULL); cbname = eventer_name_for_callback_e(e->callback, e); spec = eventer_get_spec_for_event(e); mtevLT(eventer_deb, &__now, "epoll(%d): fire on %d/%x to %s(%p)\n", spec->epoll_fd, fd, mask, cbname?cbname:"???", e->callback); mtev_memory_begin(); LIBMTEV_EVENTER_CALLBACK_ENTRY((void *)e, (void *)e->callback, (char *)cbname, fd, e->mask, mask); start = mtev_gethrtime(); newmask = e->callback(e, mask, e->closure, &__now); duration = mtev_gethrtime() - start; LIBMTEV_EVENTER_CALLBACK_RETURN((void *)e, (void *)e->callback, (char *)cbname, newmask); mtev_memory_end(); stats_set_hist_intscale(eventer_callback_latency, duration, -9, 1); stats_set_hist_intscale(eventer_latency_handle_for_callback(e->callback), duration, -9, 1); if(newmask) { struct epoll_event _ev; memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = fd; if(newmask & EVENTER_READ) _ev.events |= (EPOLLIN|EPOLLPRI); if(newmask & EVENTER_WRITE) _ev.events |= (EPOLLOUT); if(newmask & EVENTER_EXCEPTION) _ev.events |= (EPOLLERR|EPOLLHUP); if(master_fds[fd].e == NULL) { mtevL(mtev_debug, "eventer %s(%p) epoll asked to modify descheduled fd: %d\n", cbname?cbname:"???", e->callback, fd); } else { if(!pthread_equal(pthread_self(), e->thr_owner)) { pthread_t tgt = e->thr_owner; e->thr_owner = pthread_self(); spec = eventer_get_spec_for_event(e); if(e->mask != 0 && !needs_add) { mtevL(eventer_deb, "epoll_ctl(%d, del, %d)\n", spec->epoll_fd, fd); if(epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, fd, &_ev) != 0) { mtevFatal(mtev_error, "epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, fd, &_ev) failed; " "spec->epoll_fd: %d; fd: %d; errno: %d (%s)\n", spec->epoll_fd, fd, errno, strerror(errno)); } } e->thr_owner = tgt; spec = eventer_get_spec_for_event(e); mtevL(eventer_deb, "epoll_ctl(%d, add, %d)\n", spec->epoll_fd, fd); mtevAssert(epoll_ctl(spec->epoll_fd, EPOLL_CTL_ADD, fd, &_ev) == 0); mtevL(eventer_deb, "epoll(%d) moved event[%p] from t@%d to t@%d\n", spec->epoll_fd, e, (int)pthread_self(), (int)tgt); } else { int epoll_rv; int epoll_cmd = (e->mask == 0 || needs_add) ? EPOLL_CTL_ADD : EPOLL_CTL_MOD; spec = eventer_get_spec_for_event(e); mtevL(eventer_deb, "epoll_ctl(%d, %s, %d)\n", spec->epoll_fd, epoll_cmd == EPOLL_CTL_ADD ? "add" : "mod", fd); epoll_rv = epoll_ctl(spec->epoll_fd, epoll_cmd, fd, &_ev); if(epoll_rv != 0 && ((epoll_cmd == EPOLL_CTL_ADD && errno == EEXIST) || (epoll_cmd == EPOLL_CTL_MOD && errno == ENOENT))) { /* try the other way */ epoll_cmd = (epoll_cmd == EPOLL_CTL_ADD) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD; mtevL(eventer_deb, "retry epoll_ctl(%d, %s, %d)\n", spec->epoll_fd, epoll_cmd == EPOLL_CTL_ADD ? "add" : "mod", fd); epoll_rv = epoll_ctl(spec->epoll_fd, epoll_cmd, fd, &_ev); } if(epoll_rv != 0) { const char *cb_name = eventer_name_for_callback_e(e->callback, e); mtevFatal(mtev_error, "epoll_ctl(spec->epoll_fd, %s, fd, &_ev) failed; " "spec->epoll_fd: %d; fd: %d; errno: %d (%s); callback: %s\n", epoll_cmd == EPOLL_CTL_ADD ? "EPOLL_CTL_ADD" : "EPOLL_CTL_MOD", spec->epoll_fd, fd, errno, strerror(errno), cb_name ? cb_name : "???"); } } } /* Set our mask */ e->mask = newmask; } else { /* see kqueue implementation for details on the next line */ if(master_fds[fd].e == e) { /* if newmask == 0 the user has floated the connection. If we get here * and they have not called `eventer_remove_fd` it is a misuse of mtev. * * Check if they are compliant with floats here and remove_fd if they * forgot to and warn in the log */ spec = eventer_get_spec_for_event(e); struct epoll_event _ev; memset(&_ev, 0, sizeof(_ev)); _ev.data.fd = fd; if (epoll_ctl(spec->epoll_fd, EPOLL_CTL_DEL, e->fd, &_ev) == 0) { mtevL(mtev_error, "WARNING: You forgot to 'eventer_remove_fd()' before returning a mask of zero.\n"); } master_fds[fd].e = NULL; } eventer_free(e); } release_master_fd(fd, lockstate); }