void ph_nbio_emitter_init(struct ph_nbio_emitter *emitter) { struct sigevent sev; port_notify_t notify; struct itimerspec ts; emitter->io_fd = port_create(); if (emitter->io_fd == -1) { ph_panic("port_create: `Pe%d", errno); } memset(&sev, 0, sizeof(sev)); memset(¬ify, 0, sizeof(notify)); memset(&ts, 0, sizeof(ts)); ts.it_interval.tv_nsec = WHEEL_INTERVAL_MS * 1000000; ts.it_value.tv_nsec = ts.it_interval.tv_nsec; notify.portnfy_port = emitter->io_fd; sev.sigev_notify = SIGEV_PORT; sev.sigev_value.sival_ptr = ¬ify; if (timer_create(CLOCK_REALTIME, &sev, &emitter->port_timer)) { ph_panic("failed to create timer: `Pe%d", errno); } if (timer_settime(emitter->port_timer, 0, &ts, NULL)) { ph_panic("failed to set timer: `Pe%d", errno); } }
ph_result_t ph_nbio_emitter_apply_io_mask(struct ph_nbio_emitter *emitter, ph_job_t *job, ph_iomask_t mask) { int res; int want_mask = 0; if (job->fd == -1) { return PH_OK; } switch (mask & (PH_IOMASK_READ|PH_IOMASK_WRITE)) { case PH_IOMASK_READ: want_mask = POLLIN|DEFAULT_POLL_MASK; break; case PH_IOMASK_WRITE: want_mask = POLLOUT|DEFAULT_POLL_MASK; break; case PH_IOMASK_READ|PH_IOMASK_WRITE: want_mask = POLLIN|POLLOUT|DEFAULT_POLL_MASK; break; case 0: default: want_mask = 0; } if (want_mask == job->kmask) { return PH_OK; } switch (want_mask) { case 0: res = port_dissociate(emitter->io_fd, PORT_SOURCE_FD, job->fd); if (res != 0 && errno == ENOENT) { res = 0; } if (res != 0) { ph_panic("port_dissociate: setting mask to %02x on fd %d -> `Pe%d", mask, job->fd, errno); } job->kmask = 0; job->mask = 0; break; default: job->mask = mask; job->kmask = want_mask; res = port_associate(emitter->io_fd, PORT_SOURCE_FD, job->fd, want_mask, job); if (res != 0) { ph_panic("port_associate: setting mask to %02x on fd %d -> `Pe%d", mask, job->fd, errno); return PH_ERR; } } return PH_OK; }
void ph_nbio_emitter_init(struct ph_nbio_emitter *emitter) { struct kevent tev; emitter->io_fd = kqueue(); if (emitter->io_fd == -1) { ph_panic("kqueue(): `Pe%d", errno); } init_kq_set(&emitter->kqset); // Configure timer EV_SET(&tev, 0, EVFILT_TIMER, EV_ADD, 0, WHEEL_INTERVAL_MS, emitter); if (kevent(emitter->io_fd, &tev, 1, NULL, 0, NULL)) { ph_panic("setting up timer: kevent: `Pe%d", errno); } }
/* called when ares wants to change the event mask */ static void sock_state_cb(void *data, ares_socket_t socket_fd, int readable, int writable) { ph_dns_channel_t *chan = data; ph_job_t *job; ph_iomask_t mask = 0; if (readable) { mask |= PH_IOMASK_READ; } if (writable) { mask |= PH_IOMASK_WRITE; } if (ph_ht_lookup(&chan->sock_map, &socket_fd, &job, false) != PH_OK) { ph_panic("job for socket %d was not found in ares sock_state_cb", socket_fd); } if (mask) { apply_mask(chan, job, mask); } else { ph_job_set_nbio(job, 0, NULL); // We're done with this guy, remove it ph_ht_del(&chan->sock_map, &socket_fd); ph_mem_free(mt.job, job); } }
static void do_ares_init(void) { int res = ares_library_init(ARES_LIB_INIT_ALL); atexit(do_ares_fini); if (res) { ph_panic("ares_library_init failed: %s", ares_strerror(res)); } ph_memtype_register_block(sizeof(defs)/sizeof(defs[0]), defs, &mt.chan); // This must be the last thing we do in this function default_channel = create_chan(); if (!default_channel) { ph_panic("failed to create default DNS channel"); } }
static void grow_kq_set(struct ph_nbio_kq_set *set) { struct kevent *k; if (set->events == set->base) { k = malloc(set->size * 2 * sizeof(*k)); if (!k) { ph_panic("OOM"); } memcpy(k, set->events, set->used * sizeof(*k)); set->events = k; } else { k = realloc(set->events, set->size * 2 * sizeof(*k)); if (!k) { ph_panic("OOM"); } set->events = k; } set->size *= 2; }
ph_result_t ph_nbio_emitter_apply_io_mask(struct ph_nbio_emitter *emitter, ph_job_t *job, ph_iomask_t mask) { struct ph_nbio_kq_set *set, local_set; int res; if (job->fd == -1) { return PH_OK; } if (mask == job->kmask) { return PH_OK; } if (emitter == ph_thread_self()->is_emitter) { set = &emitter->kqset; } else { init_kq_set(&local_set); set = &local_set; } if (mask & PH_IOMASK_READ) { add_kq_set(set, job->fd, EVFILT_READ, EV_ADD|EV_ONESHOT, 0, 0, job); } if (mask & PH_IOMASK_WRITE) { add_kq_set(set, job->fd, EVFILT_WRITE, EV_ADD|EV_ONESHOT, 0, 0, job); } if ((mask & (PH_IOMASK_READ|PH_IOMASK_WRITE)) == 0) { // Neither read nor write -> delete add_kq_set(set, job->fd, EVFILT_READ, EV_DELETE, 0, 0, job); add_kq_set(set, job->fd, EVFILT_WRITE, EV_DELETE, 0, 0, job); } job->kmask = mask; job->mask = mask; if (set == &local_set) { // Apply it immediately res = kevent(emitter->io_fd, set->events, set->used, NULL, 0, NULL); if (res != 0 && mask == 0 && errno == ENOENT) { // It's "OK" if we decided to delete it and it wasn't there res = 0; } if (res != 0) { ph_panic("kevent: setting mask to %02x on fd %d with %d slots -> `Pe%d", mask, job->fd, set->used, errno); return PH_ERR; } } return PH_OK; }
void *ph_mem_alloc_size(ph_memtype_t mt, uint64_t size) { struct mem_type *mem_type = resolve_mt(mt); struct sized_header *ptr; ph_counter_block_t *block; static const uint8_t slots[2] = { SLOT_BYTES, SLOT_ALLOCS }; int64_t values[2]; if (mem_type->def.item_size) { memory_panic( "mem_type %s is not vsize, cannot be used with ph_mem_alloc_size", mem_type->def.name); return NULL; } if (size > INT64_MAX) { // we can't account for numbers this big return NULL; } ptr = malloc(size + HEADER_RESERVATION); if (!ptr) { ph_counter_scope_add(mem_type->scope, mem_type->first_slot + SLOT_OOM, 1); if (mem_type->def.flags & PH_MEM_FLAGS_PANIC) { ph_panic("OOM while allocating %" PRIu64 " bytes of %s/%s memory", size + HEADER_RESERVATION, mem_type->def.facility, mem_type->def.name); } return NULL; } ptr->size = size; ptr->mt = mt; ptr++; block = ph_counter_block_open(mem_type->scope); values[0] = size; values[1] = 1; ph_counter_block_bulk_add(block, 2, slots, values); ph_counter_block_delref(block); if (mem_type->def.flags & PH_MEM_FLAGS_ZERO) { memset(ptr, 0, size); } return ptr; }
static ph_dns_channel_t *create_chan(void) { ph_dns_channel_t *chan; struct ares_options opts; int res; pthread_mutexattr_t attr; chan = ph_mem_alloc(mt.chan); if (!chan) { return NULL; } pthread_mutexattr_init(&attr); pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); pthread_mutex_init(&chan->chanlock, &attr); pthread_mutexattr_destroy(&attr); if (ph_ht_init(&chan->sock_map, 4, &sock_key, &ph_ht_ptr_val_def) != PH_OK) { ph_panic("failed to init sock map"); } memset(&opts, 0, sizeof(opts)); opts.sock_state_cb_data = chan; opts.sock_state_cb = sock_state_cb; opts.flags = ARES_FLAG_STAYOPEN; res = ares_init_options(&chan->chan, &opts, ARES_OPT_SOCK_STATE_CB|ARES_OPT_FLAGS); if (res != ARES_SUCCESS) { ph_panic("failed to ares_init_options: %s", ares_strerror(res)); } ares_set_socket_callback(chan->chan, sock_create_cb, chan); return chan; }
void ph_nbio_emitter_init(struct ph_nbio_emitter *emitter) { struct itimerspec ts; #ifdef HAVE_EPOLL_CREATE1 emitter->io_fd = epoll_create1(EPOLL_CLOEXEC); #else emitter->io_fd = epoll_create(1024*1024); #endif if (emitter->io_fd == -1) { ph_panic("epoll_create: `Pe%d", errno); } #ifndef HAVE_EPOLL_CREATE1 fcntl(emitter->io_fd, F_SETFD, FD_CLOEXEC); #endif emitter->timer_fd = timerfd_create( CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC); if (emitter->timer_fd == -1) { ph_panic("timerfd_create(CLOCK_MONOTONIC) failed: `Pe%d", errno); } memset(&ts, 0, sizeof(ts)); ts.it_interval.tv_nsec = WHEEL_INTERVAL_MS * 1000000; ts.it_value.tv_nsec = ts.it_interval.tv_nsec; timerfd_settime(emitter->timer_fd, 0, &ts, NULL); ph_job_init(&emitter->timer_job); emitter->timer_job.callback = tick_epoll; emitter->timer_job.fd = emitter->timer_fd; emitter->timer_job.data = emitter; emitter->timer_job.emitter_affinity = emitter->emitter_id; ph_job_set_nbio(&emitter->timer_job, PH_IOMASK_READ, 0); }
static inline void dispatch_kevent(struct ph_nbio_emitter *emitter, ph_thread_t *thread, struct kevent *event) { ph_iomask_t mask; ph_job_t *job; if (event->filter != EVFILT_TIMER && (event->flags & EV_ERROR) != 0) { // We're pretty strict about errors at this stage to try to // ensure that we're doing the right thing. There may be // cases that we should ignore ph_panic("kqueue error on fd:%d `Pe%d", (int)event->ident, (int)event->data); } switch (event->filter) { case EVFILT_TIMER: gettimeofday(&thread->now, NULL); thread->refresh_time = false; ph_nbio_emitter_timer_tick(emitter); break; case EVFILT_READ: mask = PH_IOMASK_READ; // You'd think that we'd want to do this here, but EV_EOF can // be set when we notice that read has been shutdown, but while // we still have data in the buffer that we want to read. // On this platform we detect EOF as part of attempting to read /* if (event->flags & EV_EOF) { mask |= PH_IOMASK_ERR; } */ thread->refresh_time = true; job = event->udata; job->kmask = 0; ph_nbio_emitter_dispatch_immediate(emitter, job, mask); break; case EVFILT_WRITE: thread->refresh_time = true; job = event->udata; job->kmask = 0; ph_nbio_emitter_dispatch_immediate(emitter, job, PH_IOMASK_WRITE); break; } }
static ph_thread_t *ph_thread_init_myself(bool booting) { ph_thread_t *me; ck_epoch_record_t *er; er = ck_epoch_recycle(&misc_epoch); if (er) { me = ph_container_of(er, ph_thread_t, epoch_record); } else { me = calloc(1, sizeof(*me)); if (!me) { ph_panic("fatal OOM in ph_thread_init_myself()"); } ck_epoch_register(&misc_epoch, &me->epoch_record); ck_stack_push_mpmc(&ph_thread_all_threads, &me->thread_linkage); ph_counter_init_thread(me); } #ifdef HAVE___THREAD __ph_thread_self = me; #endif pthread_setspecific(__ph_thread_key, me); PH_STAILQ_INIT(&me->pending_nbio); PH_STAILQ_INIT(&me->pending_pool); me->tid = ck_pr_faa_32(&next_tid, 1); me->thr = pthread_self(); #ifdef __sun__ me->lwpid = _lwp_self(); #endif #if defined(__linux__) || defined(__MACH__) // see if we can discover our thread name from the system pthread_getname_np(me->thr, me->name, sizeof(me->name)); #endif // If we were recycled from a non-phenom thread, and are initializing // a non-phenom thread, it is possible that there are still deferred // items to reap in this record, so get them now. if (er && !booting) { ck_epoch_barrier(&misc_epoch, &me->epoch_record); } return me; }
static void result_cb(void *arg, int status, int timeouts, unsigned char *abuf, int alen) { struct ph_dns_query *q = arg; struct ph_dns_query_response *resp = NULL; switch (q->qtype) { case PH_DNS_QUERY_NONE: q->func.raw(q->arg, status, timeouts, abuf, alen); break; case PH_DNS_QUERY_MX: if (status == ARES_SUCCESS) { resp = make_mx_resp(abuf, alen); } q->func.func(q->arg, status, timeouts, abuf, alen, resp); break; case PH_DNS_QUERY_A: if (status == ARES_SUCCESS) { resp = make_a_resp(abuf, alen); } q->func.func(q->arg, status, timeouts, abuf, alen, resp); break; case PH_DNS_QUERY_SRV: if (status == ARES_SUCCESS) { resp = make_srv_resp(abuf, alen); } q->func.func(q->arg, status, timeouts, abuf, alen, resp); break; case PH_DNS_QUERY_AAAA: if (status == ARES_SUCCESS) { resp = make_aaaa_resp(abuf, alen); } q->func.func(q->arg, status, timeouts, abuf, alen, resp); break; default: ph_panic("invalid qtype %d", q->qtype); } ph_mem_free(mt.query, q); }
void ph_dns_channel_query( ph_dns_channel_t *chan, const char *name, int query_type, ph_dns_channel_query_func func, void *arg) { struct ph_dns_query *q; int dnsclass = ns_c_in, type; chan = fixup_chan(chan); q = ph_mem_alloc(mt.query); if (!q) { func(arg, ARES_ENOMEM, 0, NULL, 0, NULL); return; } q->chan = chan; q->arg = arg; q->qtype = query_type; q->func.func = func; switch (query_type) { case PH_DNS_QUERY_A: type = ns_t_a; break; case PH_DNS_QUERY_AAAA: type = ns_t_aaaa; break; case PH_DNS_QUERY_SRV: type = ns_t_srv; break; case PH_DNS_QUERY_MX: type = ns_t_mx; break; default: ph_panic("invalid query type %d", query_type); } pthread_mutex_lock(&chan->chanlock); ares_search(chan->chan, name, dnsclass, type, result_cb, q); pthread_mutex_unlock(&chan->chanlock); }
void *ph_mem_alloc(ph_memtype_t mt) { struct mem_type *mem_type = resolve_mt(mt); void *ptr; ph_counter_block_t *block; int64_t values[3]; static const uint8_t slots[2] = { SLOT_BYTES, SLOT_ALLOCS }; if (mem_type->def.item_size == 0) { memory_panic("mem_type %s is vsize, cannot be used with ph_mem_alloc", mem_type->def.name); return NULL; } ptr = malloc(mem_type->def.item_size); if (!ptr) { ph_counter_scope_add(mem_type->scope, mem_type->first_slot + SLOT_OOM, 1); if (mem_type->def.flags & PH_MEM_FLAGS_PANIC) { ph_panic("OOM while allocating %" PRIu64 " bytes of %s/%s memory", mem_type->def.item_size, mem_type->def.facility, mem_type->def.name); } return NULL; } block = ph_counter_block_open(mem_type->scope); values[0] = mem_type->def.item_size; values[1] = 1; ph_counter_block_bulk_add(block, 2, slots, values); ph_counter_block_delref(block); if (mem_type->def.flags & PH_MEM_FLAGS_ZERO) { memset(ptr, 0, mem_type->def.item_size); } return ptr; }
void ph_var_delref(ph_variant_t *var) { if (!ph_refcnt_del(&var->ref)) { return; } switch (var->type) { case PH_VAR_TRUE: case PH_VAR_FALSE: case PH_VAR_NULL: ph_panic("You have a refcounting problem"); case PH_VAR_ARRAY: if (var->u.aval.arr) { uint32_t i; for (i = 0; i < var->u.aval.len; i++) { ph_var_delref(var->u.aval.arr[i]); } ph_mem_free(mt.arr, var->u.aval.arr); var->u.aval.arr = 0; } break; case PH_VAR_OBJECT: ph_ht_destroy(&var->u.oval); break; case PH_VAR_STRING: if (var->u.sval) { ph_string_delref(var->u.sval); var->u.sval = 0; } break; default: ; } ph_mem_free(mt.var, var); }
/* assumes that str points to 'u' plus at least 4 valid hex digits */ static int32_t decode_unicode_escape(const char *str) { int i; int32_t value = 0; assert(str[0] == 'u'); for (i = 1; i <= 4; i++) { char c = str[i]; value <<= 4; if (l_isdigit(c)) value += c - '0'; else if (l_islower(c)) value += c - 'a' + 10; else if (l_isupper(c)) value += c - 'A' + 10; else ph_panic("unpossible unicode escape c=%d", c); } return value; }
void ph_nbio_emitter_run(struct ph_nbio_emitter *emitter, ph_thread_t *thread) { int n, i; int max_chunk; max_chunk = ph_config_query_int("$.nbio.max_per_wakeup", 1024); while (ck_pr_load_int(&_ph_run_loop)) { n = kevent(emitter->io_fd, emitter->kqset.events, emitter->kqset.used, emitter->kqset.events, MIN(emitter->kqset.size, max_chunk), NULL); if (n < 0 && errno != EINTR) { ph_panic("kevent: `Pe%d", errno); } if (n <= 0) { continue; } ph_thread_epoch_begin(); for (i = 0; i < n; i++) { dispatch_kevent(emitter, thread, &emitter->kqset.events[i]); } if (n + 1 >= emitter->kqset.size) { grow_kq_set(&emitter->kqset); } emitter->kqset.used = 0; if (ph_job_have_deferred_items(thread)) { ph_job_pool_apply_deferred_items(thread); } ph_thread_epoch_end(); ph_thread_epoch_poll(); } dispose_kq_set(&emitter->kqset); }
void ph_nbio_emitter_run(struct ph_nbio_emitter *emitter, ph_thread_t *thread) { port_event_t *event; uint_t n, i, max_chunk, max_sleep; ph_job_t *job; ph_iomask_t mask; struct timespec ts; max_chunk = ph_config_query_int("$.nbio.max_per_wakeup", 1024); max_sleep = ph_config_query_int("$.nbio.max_sleep", 5000); ts.tv_sec = max_sleep / 1000; ts.tv_nsec = (max_sleep - (ts.tv_sec * 1000)) * 1000000; event = malloc(max_chunk * sizeof(port_event_t)); while (ck_pr_load_int(&_ph_run_loop)) { n = 1; memset(event, 0, sizeof(*event)); if (port_getn(emitter->io_fd, event, max_chunk, &n, &ts)) { if (errno != EINTR && errno != ETIME) { ph_panic("port_getn: `Pe%d", errno); } n = 0; } if (!n) { ph_thread_epoch_poll(); continue; } for (i = 0; i < n; i++) { ph_thread_epoch_begin(); switch (event[i].portev_source) { case PORT_SOURCE_TIMER: gettimeofday(&thread->now, NULL); thread->refresh_time = false; ph_nbio_emitter_timer_tick(emitter); break; case PORT_SOURCE_USER: break; case PORT_SOURCE_FD: thread->refresh_time = true; job = event[i].portev_user; switch (event[i].portev_events & (POLLIN|POLLOUT|POLLERR|POLLHUP)) { case POLLIN: mask = PH_IOMASK_READ; break; case POLLOUT: mask = PH_IOMASK_WRITE; break; case POLLIN|POLLOUT: mask = PH_IOMASK_READ|PH_IOMASK_WRITE; break; default: mask = PH_IOMASK_ERR; } job->kmask = 0; ph_nbio_emitter_dispatch_immediate(emitter, job, mask); break; } if (ph_job_have_deferred_items(thread)) { ph_job_pool_apply_deferred_items(thread); } ph_thread_epoch_end(); ph_thread_epoch_poll(); } } free(event); }
void *ph_mem_realloc(ph_memtype_t mt, void *ptr, uint64_t size) { struct mem_type *mem_type; ph_counter_block_t *block; static const uint8_t slots[2] = { SLOT_BYTES, SLOT_REALLOC }; int64_t values[3]; struct sized_header *hdr; uint64_t orig_size; void *new_ptr; if (size == 0) { ph_mem_free(mt, ptr); return NULL; } if (ptr == NULL) { return ph_mem_alloc_size(mt, size); } mem_type = resolve_mt(mt); if (mem_type->def.item_size) { memory_panic( "mem_type %s is not vsize and cannot be used with ph_mem_realloc", mem_type->def.name); return NULL; } hdr = ptr; hdr--; ptr = hdr; if (hdr->mt != mt) { memory_panic("ph_mem_realloc: hdr->mt %d != caller provided mt %d %s", hdr->mt, mt, mem_type->def.name); } orig_size = hdr->size; if (orig_size == size) { return ptr; } hdr = realloc(ptr, size + HEADER_RESERVATION); if (!hdr) { ph_counter_scope_add(mem_type->scope, mem_type->first_slot + SLOT_OOM, 1); if (mem_type->def.flags & PH_MEM_FLAGS_PANIC) { ph_panic("OOM while allocating %" PRIu64 " bytes of %s/%s memory", size + HEADER_RESERVATION, mem_type->def.facility, mem_type->def.name); } return NULL; } new_ptr = hdr + 1; hdr->size = size; block = ph_counter_block_open(mem_type->scope); values[0] = size - orig_size; values[1] = 1; ph_counter_block_bulk_add(block, 2, slots, values); ph_counter_block_delref(block); if (size > orig_size && mem_type->def.flags & PH_MEM_FLAGS_ZERO) { memset((char*)new_ptr + orig_size, 0, size - orig_size); } return new_ptr; }