int onload_stack_opt_get_int(const char* opt_env, int64_t* opt_val) { struct oo_per_thread* pt; ci_netif_config_opts* opts; pt = oo_per_thread_get(); opts = pt->thread_local_netif_opts; if( opts == NULL) { opts = &ci_cfg_opts.netif_opts; } #undef CI_CFG_OPT #define CI_CFG_OPT(env, name, p3, p4, p5, p6, p7, p8, p9, p10) \ { \ if( ! strcmp(env, opt_env) ) { \ *opt_val = opts->name; \ return 0; \ } \ } #include <ci/internal/opts_netif_def.h> LOG_E(ci_log("%s: Requested option %s not found", __FUNCTION__, opt_env)); return -EINVAL; }
/* Simply delete the thread local copy of config_opts to revert to * using the netif's config_opts. */ int onload_stack_opt_reset(void) { struct oo_per_thread* pt; pt = oo_per_thread_get(); if( pt->thread_local_netif_opts != NULL ) { free(pt->thread_local_netif_opts); pt->thread_local_netif_opts = NULL; } return 0; }
static citp_fdinfo * citp_fdtable_probe(unsigned fd) { citp_fdinfo* fdi; int saved_errno; ci_assert(fd < citp_fdtable.size); if( ! CITP_OPTS.probe || oo_per_thread_get()->in_vfork_child ) return NULL; saved_errno = errno; CITP_FDTABLE_LOCK(); __citp_fdtable_extend(fd); fdi = citp_fdtable_probe_locked(fd, CI_FALSE, CI_FALSE); CITP_FDTABLE_UNLOCK(); errno = saved_errno; return fdi; }
/* This API provides per thread ability to modify ci_netif_config_opts * for future stacks. If not present, this makes a thread local copy * of the default ci_netif_config_opts and updates it as requested. * Any future stacks will use the thread local copy of config_opts and * if absent, use the default copy. */ int onload_stack_opt_set_int(const char* opt_env, int64_t opt_val) { struct oo_per_thread* pt; ci_netif_config_opts* default_opts; pt = oo_per_thread_get(); if( pt->thread_local_netif_opts == NULL ) { pt->thread_local_netif_opts = malloc(sizeof(*pt->thread_local_netif_opts)); if( ! pt->thread_local_netif_opts) return -ENOMEM; default_opts = &ci_cfg_opts.netif_opts; memcpy(pt->thread_local_netif_opts, default_opts, sizeof(*default_opts)); } #define ci_uint32_fmt "%u" #define ci_uint16_fmt "%u" #define ci_int32_fmt "%d" #define ci_int16_fmt "%d" #define ci_iptime_t_fmt "%u" #define _CI_CFG_BITVAL _optbits #define _CI_CFG_BITVAL1 1 #define _CI_CFG_BITVAL2 2 #define _CI_CFG_BITVAL3 3 #define _CI_CFG_BITVAL4 4 #define _CI_CFG_BITVAL8 8 #define _CI_CFG_BITVAL16 16 #define _CI_CFG_BITVALA8 _CI_CFG_BITVAL #undef CI_CFG_OPTFILE_VERSION #undef CI_CFG_OPT #undef CI_CFG_OPTGROUP #undef MIN #undef MAX #undef SMIN #undef SMAX #define CI_CFG_OPTFILE_VERSION(version) #define CI_CFG_OPTGROUP(group, category, expertise) #define MIN 0 #define MAX (((1ull<<(_bitwidth-1))<<1) - 1ull) #define SMAX (MAX >> 1) #define SMIN (-SMAX-1) #define CI_CFG_OPT(env, name, type, doc, bits, group, default, min, max, presentation) \ { \ type _max; \ type _min; \ int _optbits = sizeof(type) * 8; \ int _bitwidth = _CI_CFG_BITVAL##bits; \ (void)_bitwidth; \ (void)_optbits; \ _max = (type)(max); \ _min = (type)(min); \ if( ! strcmp(env, opt_env) ) { \ if( opt_val < _min || opt_val > _max ) { \ LOG_E(ci_log("%s: %"PRId64" outside of range ("type##_fmt":" \ type##_fmt") for %s", \ __FUNCTION__, opt_val, _min, _max, opt_env)); \ return -EINVAL; \ } \ pt->thread_local_netif_opts->name = opt_val; \ return 0; \ } \ } #include <ci/internal/opts_netif_def.h> LOG_E(ci_log("%s: Requested option %s not found", __FUNCTION__, opt_env)); return -EINVAL; }
citp_fdinfo * citp_fdtable_lookup(unsigned fd) { /* Note that if we haven't yet initialised this module, then ** [inited_count] will be zero, and the following test will fail. So the ** test for initialisation is done further down... ** ** This is highly performance critial. DO NOT add any code between here ** and the first [return] statement. */ citp_fdinfo* fdi; /* In some cases, we'll lock fdtable. Assert that it is possible: */ ci_assert(oo_per_thread_get()->sig.inside_lib); if( fd < citp_fdtable.inited_count ) { volatile citp_fdinfo_p* p_fdip = &citp_fdtable.table[fd].fdip; citp_fdinfo_p fdip; again: /* Swap in the busy marker. */ fdip = *p_fdip; if( fdip_is_normal(fdip) ) { if( citp_fdtable_not_mt_safe() ) { if( fdip_cas_succeed(p_fdip, fdip, fdip_busy) ) { fdi = fdip_to_fdi(fdip); ci_assert(fdi); ci_assert_gt(oo_atomic_read(&fdi->ref_count), 0); ci_assert(fdip_is_closing(fdip) || fdip_is_reserved(fdip) || fdi->fd == fd); /* Bump the reference count. */ citp_fdinfo_ref(fdi); if( ! citp_fdinfo_is_consistent(fdi) ) { /* Something is wrong. Re-probe. */ fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_TRUE); } else { /* Swap the busy marker out again. */ citp_fdtable_busy_clear(fd, fdip, 0); } return fdi; } goto again; } else { /* No need to use atomic ops when single-threaded. The definition * of "fds_mt_safe" is that the app does not change the meaning of * a file descriptor in one thread when it is being used in another * thread. In that case I'm hoping this should be safe, but at * time of writing I'm really not confident. (FIXME). */ fdi = fdip_to_fdi(fdip); if( ci_is_multithreaded() ) citp_fdinfo_ref(fdi); else ++fdi->ref_count.n; if( ! citp_fdinfo_is_consistent(fdi) ) fdi = citp_reprobe_moved(fdi, CI_FALSE, CI_FALSE); return fdi; } } /* Not normal! */ if( fdip_is_passthru(fdip) ) return NULL; if( fdip_is_busy(fdip) ) { citp_fdtable_busy_wait(fd, 0); goto again; } ci_assert(fdip_is_unknown(fdip)); goto probe; } if (citp.init_level < CITP_INIT_FDTABLE) { if (_citp_do_init_inprogress == 0) CI_TRY(citp_do_init(CITP_INIT_ALL)); else CI_TRY(citp_do_init(CITP_INIT_FDTABLE)); /* get what we need */ } if( fd >= citp_fdtable.size ) return NULL; probe: fdi = citp_fdtable_probe(fd); return fdi; }
int citp_ep_close(unsigned fd) { volatile citp_fdinfo_p* p_fdip; citp_fdinfo_p fdip; int rc, got_lock; citp_fdinfo* fdi; /* Do not touch shared fdtable when in vfork child. */ if( oo_per_thread_get()->in_vfork_child ) return ci_tcp_helper_close_no_trampoline(fd); /* Interlock against other closes, against the fdtable being extended, ** and against select and poll. */ CITP_FDTABLE_LOCK(); got_lock = 1; __citp_fdtable_extend(fd); if( fd >= citp_fdtable.inited_count ) { rc = ci_sys_close(fd); goto done; } p_fdip = &citp_fdtable.table[fd].fdip; again: fdip = *p_fdip; if( fdip_is_busy(fdip) ) fdip = citp_fdtable_busy_wait(fd, 1); if( fdip_is_closing(fdip) | fdip_is_reserved(fdip) ) { /* Concurrent close or attempt to close reserved. */ Log_V(ci_log("%s: fd=%d closing=%d reserved=%d", __FUNCTION__, fd, fdip_is_closing(fdip), fdip_is_reserved(fdip))); errno = EBADF; rc = -1; goto done; } #if CI_CFG_FD_CACHING /* Need to check in case this sucker's cached */ if( fdip_is_unknown(fdip) ) { fdi = citp_fdtable_probe_locked(fd, CI_FALSE, CI_FALSE); if( fdi == &citp_the_closed_fd ) { citp_fdinfo_release_ref(fdi, CI_TRUE); errno = EBADF; rc = -1; goto done; } if( fdi ) citp_fdinfo_release_ref(fdi, CI_TRUE); } #endif ci_assert(fdip_is_normal(fdip) | fdip_is_passthru(fdip) | fdip_is_unknown(fdip)); /* Swap in the "closed" pseudo-fdinfo. This lets any other thread know ** that we're in the middle of closing this fd. */ if( fdip_cas_fail(p_fdip, fdip, fdip_closing) ) goto again; if( fdip_is_normal(fdip) ) { fdi = fdip_to_fdi(fdip); CITP_FDTABLE_UNLOCK(); got_lock = 0; if( fdi->is_special ) { Log_V(ci_log("%s: fd=%d is_special, returning EBADF", __FUNCTION__, fd)); errno = EBADF; rc = -1; fdtable_swap(fd, fdip_closing, fdip, 0); goto done; } Log_V(ci_log("%s: fd=%d u/l socket", __FUNCTION__, fd)); ci_assert_equal(fdi->fd, fd); ci_assert_equal(fdi->on_ref_count_zero, FDI_ON_RCZ_NONE); fdi->on_ref_count_zero = FDI_ON_RCZ_CLOSE; if( fdi->epoll_fd >= 0 ) { citp_fdinfo* epoll_fdi = citp_epoll_fdi_from_member(fdi, 0); if( epoll_fdi ) { if( epoll_fdi->protocol->type == CITP_EPOLL_FD ) citp_epoll_on_close(epoll_fdi, fdi, 0); citp_fdinfo_release_ref(epoll_fdi, 0); } } citp_fdinfo_release_ref(fdi, 0); rc = 0; } else { ci_assert(fdip_is_passthru(fdip) || fdip_is_unknown(fdip)); if( ! fdtable_strict() ) { CITP_FDTABLE_UNLOCK(); got_lock = 0; } Log_V(ci_log("%s: fd=%d passthru=%d unknown=%d", __FUNCTION__, fd, fdip_is_passthru(fdip), fdip_is_unknown(fdip))); fdtable_swap(fd, fdip_closing, fdip_unknown, fdtable_strict()); rc = ci_tcp_helper_close_no_trampoline(fd); } done: if( got_lock ) CITP_FDTABLE_UNLOCK(); FDTABLE_ASSERT_VALID(); return rc; }
/* ** Why do these live here? Because they need to hack into the low-level ** dirty nastiness of the fdtable. */ int citp_ep_dup(unsigned oldfd, int (*syscall)(int oldfd, long arg), long arg) { /* This implements dup(oldfd) and fcntl(oldfd, F_DUPFD, arg). */ volatile citp_fdinfo_p* p_oldfdip; citp_fdinfo_p oldfdip; citp_fdinfo* newfdi = 0; citp_fdinfo* oldfdi; int newfd; Log_V(log("%s(%d)", __FUNCTION__, oldfd)); if(CI_UNLIKELY( citp.init_level < CITP_INIT_FDTABLE || oo_per_thread_get()->in_vfork_child )) /* Lib not initialised, so no U/L state, and therefore system dup() ** will do just fine. */ return syscall(oldfd, arg); if( oldfd >= citp_fdtable.inited_count ) { /* NB. We can't just pass through in this case because we need to worry ** about other threads racing with us. So we need to be able to lock ** this fd while we do the dup. */ ci_assert(oldfd < citp_fdtable.size); CITP_FDTABLE_LOCK(); __citp_fdtable_extend(oldfd); CITP_FDTABLE_UNLOCK(); } p_oldfdip = &citp_fdtable.table[oldfd].fdip; again: oldfdip = *p_oldfdip; if( fdip_is_busy(oldfdip) ) oldfdip = citp_fdtable_busy_wait(oldfd, 0); if( fdip_is_closing(oldfdip) | fdip_is_reserved(oldfdip) ) { errno = EBADF; return -1; } #if CI_CFG_FD_CACHING /* Need to check in case this sucker's cached */ if( fdip_is_unknown(oldfdip) ) { CITP_FDTABLE_LOCK(); oldfdi = citp_fdtable_probe_locked(oldfd, CI_FALSE, CI_FALSE); CITP_FDTABLE_UNLOCK(); if( oldfdi == &citp_the_closed_fd ) { citp_fdinfo_release_ref(oldfdi, CI_TRUE); errno = EBADF; return -1; } if( oldfdi ) citp_fdinfo_release_ref(oldfdi, CI_TRUE); } #endif if( fdip_cas_fail(p_oldfdip, oldfdip, fdip_busy) ) goto again; #if CI_CFG_FD_CACHING /* May end up with multiple refs to this, don't allow it to be cached. */ if( fdip_is_normal(oldfdip) ) fdip_to_fdi(oldfdip)->can_cache = 0; #endif if( fdip_is_normal(oldfdip) && (((oldfdi = fdip_to_fdi(oldfdip))->protocol->type) == CITP_EPOLL_FD) ) { newfdi = citp_fdinfo_get_ops(oldfdi)->dup(oldfdi); if( ! newfdi ) { citp_fdtable_busy_clear(oldfd, oldfdip, 0); errno = ENOMEM; return -1; } if( fdtable_strict() ) CITP_FDTABLE_LOCK(); newfd = syscall(oldfd, arg); if( newfd >= 0 ) citp_fdtable_new_fd_set(newfd, fdip_busy, fdtable_strict()); if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); if( newfd >= 0 ) { citp_fdtable_insert(newfdi, newfd, 0); newfdi = 0; } } else { if( fdtable_strict() ) CITP_FDTABLE_LOCK(); newfd = syscall(oldfd, arg); if( newfd >= 0 && newfd < citp_fdtable.inited_count ) { /* Mark newfd as unknown. When used, it'll get probed. * * We are not just being lazy here: Setting to unknown rather than * installing a proper fdi (when oldfd is accelerated) is essential to * vfork()+dup()+exec() working properly. Reason is that child and * parent share address space, so child is modifying the parent's * fdtable. Setting an entry to unknown is safe. */ citp_fdtable_new_fd_set(newfd, fdip_unknown, fdtable_strict()); } if( fdtable_strict() ) CITP_FDTABLE_UNLOCK(); } citp_fdtable_busy_clear(oldfd, oldfdip, 0); if( newfdi ) citp_fdinfo_free(newfdi); return newfd; }