static int efab_vi_rm_mmap_mem(struct efrm_vi *virs, unsigned long *bytes, void *opaque, int *map_num, unsigned long *offset) { int queue_type; uint32_t len; if( virs->q[EFHW_EVQ].capacity != 0 ) { len = efhw_iopages_size(&virs->q[EFHW_EVQ].pages); len = CI_MIN(len, *bytes); ci_assert_gt(len, 0); ci_mmap_iopages(&virs->q[EFHW_EVQ].pages, 0, len, bytes, opaque, map_num, offset); if(*bytes == 0) return 0; } for( queue_type=EFRM_VI_RM_DMA_QUEUE_COUNT-1; queue_type>=0; queue_type-- ) { if( virs->q[queue_type].capacity != 0 ) { len = efhw_iopages_size(&virs->q[queue_type].pages); len = CI_MIN(len, *bytes); ci_assert_gt(len, 0); ci_mmap_iopages(&virs->q[queue_type].pages, 0, len, bytes, opaque, map_num, offset); if(*bytes == 0) return 0; } } return 0; }
static int efab_vi_rm_mmap_ctpio(struct efrm_vi *virs, unsigned long *bytes, void *opaque, int *map_num, unsigned long *offset) { int rc; int len; int instance; struct efhw_nic *nic; int bar_off; /* The CTPIO region is 12K from the start of the VI's aperture. */ const int CTPIO_OFFSET = 12 * 1024; instance = virs->rs.rs_instance; if( ! (virs->flags & EFHW_VI_TX_CTPIO) ) { EFRM_ERR("%s: CTPIO is not enabled on VI instance %d\n", __FUNCTION__, instance); return -EINVAL; } /* Map the CTPIO region, which is 12K from the start of the VI's aperture. */ len = CI_MIN(*bytes, CI_PAGE_SIZE); *bytes -= len; nic = efrm_client_get_nic(virs->rs.rs_client); ci_assert_ge(nic->vi_stride, CTPIO_OFFSET + len); bar_off = (ef10_tx_dma_page_base(nic->vi_stride, instance) + CTPIO_OFFSET) & PAGE_MASK; rc = ci_mmap_bar(nic, bar_off, len, opaque, map_num, offset, 1); if( rc < 0 ) EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc); return rc; }
static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt, struct onload_zc_msg* zc_msg) { int i, bytes_left = pkt->pf.udp.pay_len; ci_ip_pkt_fmt* frag; ci_ip_pkt_fmt* handle_frag; handle_frag = frag = pkt; i = 0; ci_assert_nequal(zc_msg->iov, NULL); /* Ignore first frag if zero length and there is another frag, but * still pass the zero-length buffer as the onload_zc_handle so it * will get freed correctly */ if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) ) frag = PKT_CHK_NNL(ni, frag->frag_next); do { zc_msg->iov[i].iov_len = CI_MIN(oo_offbuf_left(&frag->buf), bytes_left); zc_msg->iov[i].iov_base = oo_offbuf_ptr(&frag->buf); zc_msg->iov[i].buf = (onload_zc_handle)handle_frag; zc_msg->iov[i].iov_flags = 0; bytes_left -= zc_msg->iov[i].iov_len; ++i; if( OO_PP_IS_NULL(frag->frag_next) || (i == CI_UDP_ZC_IOVEC_MAX) || (bytes_left == 0) ) break; frag = PKT_CHK_NNL(ni, frag->frag_next); handle_frag = frag; } while( 1 ); zc_msg->msghdr.msg_iovlen = i; }
static int efab_vi_rm_mmap_pio(struct efrm_vi *virs, unsigned long *bytes, void *opaque, int *map_num, unsigned long *offset) { int rc; int len; int instance; struct efhw_nic *nic; int bar_off; nic = efrm_client_get_nic(virs->rs.rs_client); if( nic->devtype.arch != EFHW_ARCH_EF10 ) { EFRM_ERR("%s: Only ef10 supports PIO." " Expected arch=%d but got %d\n", __FUNCTION__, EFHW_ARCH_EF10, nic->devtype.arch); return -EINVAL; } instance = virs->rs.rs_instance; /* Map the control page. */ len = CI_MIN(*bytes, CI_PAGE_SIZE); *bytes -= len; bar_off = (ef10_tx_dma_page_base(nic->vi_stride, instance) + 4096) & PAGE_MASK; rc = ci_mmap_bar(nic, bar_off, len, opaque, map_num, offset, 1); if( rc < 0 ) EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc); return rc; }
int citp_do_init(int max_init_level) { int rc = 0; int level; int saved_errno = errno; if( citp.init_level < max_init_level ) { /* If threads are launched very early in program startup, then there could be * a race here as multiple threads attempt to initialise on first access. * The guard must be recursive, since this function might be re-entered during * initialisation. */ static pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP; pthread_mutex_lock(&mutex); _citp_do_init_inprogress++; for (level = citp.init_level; level < CI_MIN(max_init_level, CITP_INIT_ALL); level++) { rc = cipt_init_funcs[level](); if (rc < 0) break; citp.init_level = level + 1; } --_citp_do_init_inprogress; pthread_mutex_unlock(&mutex); } Log_S(log("%s: reached level %d", __FUNCTION__, citp.init_level)); if( rc == 0 ) errno = saved_errno; return rc; }
static int oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt, ci_iovec_ptr* piov, int bytes_to_copy) { /* Copy data from [pkt] to [piov], following [pkt->frag_next] as * necessary. Does not modify [pkt]. May or may not advance [piov]. * The packet must contain at least [bytes_to_copy] of data in the * [pkt->buf]. [piov] may contain an arbitrary amount of space. * * Returns number of bytes copied on success, or -EFAULT otherwise. */ int n, pkt_left, pkt_off = 0; int bytes_copied = 0; while( 1 ) { pkt_left = oo_offbuf_left(&pkt->buf) - pkt_off; n = CI_MIN(pkt_left, CI_IOVEC_LEN(&piov->io)); n = CI_MIN(n, bytes_to_copy); if(CI_UNLIKELY( do_copy(CI_IOVEC_BASE(&piov->io), oo_offbuf_ptr(&pkt->buf) + pkt_off, n) != 0 )) return -EFAULT; bytes_copied += n; pkt_off += n; if( n == bytes_to_copy ) return bytes_copied; bytes_to_copy -= n; if( n == pkt_left ) { /* Caller guarantees that packet contains at least [bytes_to_copy]. */ ci_assert(OO_PP_NOT_NULL(pkt->frag_next)); ci_iovec_ptr_advance(piov, n); pkt = PKT_CHK_NNL(ni, pkt->frag_next); pkt_off = 0; /* We're unlikely to hit end-of-pkt-buf and end-of-iovec at the same * time, and if we do, just go round the loop again. */ continue; } ci_assert_equal(n, CI_IOVEC_LEN(&piov->io)); if( piov->iovlen == 0 ) return bytes_copied; piov->io = *piov->iov++; --piov->iovlen; } }
static int efab_vi_rm_mmap_io(struct efrm_vi *virs, unsigned long *bytes, void *opaque, int *map_num, unsigned long *offset) { int rc; int len; int instance; int base; unsigned vi_stride; struct efhw_nic *nic; nic = efrm_client_get_nic(virs->rs.rs_client); instance = virs->rs.rs_instance; len = CI_MIN(*bytes, CI_PAGE_SIZE); *bytes -=len; /* Make sure we can get away with a single page here. */ switch (nic->devtype.arch) { case EFHW_ARCH_FALCON: ci_assert_lt(falcon_tx_dma_page_offset(instance), CI_PAGE_SIZE); ci_assert_lt(falcon_rx_dma_page_offset(instance), CI_PAGE_SIZE); ci_assert_equal(falcon_tx_dma_page_base(instance), falcon_rx_dma_page_base(instance)); base = falcon_tx_dma_page_base(instance); break; case EFHW_ARCH_EF10: vi_stride = nic->vi_stride; ci_assert_lt(ef10_tx_dma_page_offset(vi_stride, instance), CI_PAGE_SIZE); ci_assert_lt(ef10_rx_dma_page_offset(vi_stride, instance), CI_PAGE_SIZE); ci_assert_equal(ef10_tx_dma_page_base(vi_stride, instance), ef10_rx_dma_page_base(vi_stride, instance)); base = ef10_tx_dma_page_base(vi_stride, instance); break; default: EFCH_ERR("%s: ERROR: unknown nic type (%d)", __FUNCTION__, nic->devtype.arch); base = 0; /* To quiet the compiler */ BUG(); } rc = ci_mmap_bar(nic, base, len, opaque, map_num, offset, 0); if (rc < 0 ) { EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc); return rc; } return 0; }
static void citp_get_process_name(void) { citp.process_name = citp.process_path; ci_sprintf(citp.process_path, "<unknown-proc>"); { int n; n = readlink("/proc/self/exe", citp.process_path, sizeof(citp.process_path)); if (n < 0) return; n = CI_MIN(n + 1, sizeof(citp.process_path)); citp.process_path[n - 1] = '\0'; citp.process_name = citp.process_path + n - 2; while (citp.process_name > citp.process_path && citp.process_name[-1] != '/') --citp.process_name; } }
int citp_do_init(int max_init_level) { int rc = 0; int level; int saved_errno = errno; _citp_do_init_inprogress++; for (level = citp.init_level; level < CI_MIN(max_init_level, CITP_INIT_ALL); level++) { rc = cipt_init_funcs[level](); if (rc < 0) break; citp.init_level = level + 1; } --_citp_do_init_inprogress; Log_S(log("%s: reached level %d", __FUNCTION__, citp.init_level)); if( rc == 0 ) errno = saved_errno; return rc; }
/* Set the IP TOS */ void ci_udp_set_tos( ci_udp_state* us, ci_uint32 tos ) { ci_ip_hdr_init_fixed(UDP_IP_HDR(us), IPPROTO_UDP, UDP_IP_HDR(us)->ip_ttl, CI_MIN(tos, CI_IP_MAX_TOS)); }
static void citp_opts_getenv(citp_opts_t* opts) { /* ?? TODO: would like to use opts_citp_def.h here */ const char* s; unsigned v; opts->log_via_ioctl = 3; /* TODO: Old name. Keeping reading 'til 2011, then purge. */ GET_ENV_OPT_HEX("EF_Log_VIA_IOCTL", log_via_ioctl); GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL", log_via_ioctl); if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3) { opts->log_via_ioctl = 0; citp_log_to_file(s); } else if( opts->log_via_ioctl == 3 ) { /* citp_setup_logging_early() have already detected stderr as * tty/non-tty, so just trust it. */ if( ci_log_fn == citp_log_fn_drv ) opts->log_via_ioctl = 1; else opts->log_via_ioctl = 0; } if( opts->log_via_ioctl ) { ci_log_options &=~ CI_LOG_PID; citp_setup_logging_change(citp_log_fn_drv); } else { if( getenv("EF_LOG_TIMESTAMPS") ) ci_log_options |= CI_LOG_TIME; citp_setup_logging_change(citp_log_fn_ul); } if( getenv("EF_POLL_NONBLOCK_FAST_LOOPS") && ! getenv("EF_POLL_NONBLOCK_FAST_USEC") ) log("ERROR: EF_POLL_NONBLOCK_FAST_LOOPS is deprecated, use" " EF_POLL_NONBLOCK_FAST_USEC instead"); if( getenv("EF_POLL_FAST_LOOPS") && ! getenv("EF_POLL_FAST_USEC") ) log("ERROR: EF_POLL_FAST_LOOPS is deprecated, use" " EF_POLL_FAST_USEC instead"); if( (s = getenv("EF_POLL_USEC")) && atoi(s) ) { GET_ENV_OPT_INT("EF_POLL_USEC", ul_spin_usec); opts->ul_select_spin = 1; opts->ul_poll_spin = 1; #if CI_CFG_USERSPACE_EPOLL opts->ul_epoll_spin = 1; #endif #if CI_CFG_UDP opts->udp_recv_spin = 1; opts->udp_send_spin = 1; #endif opts->tcp_recv_spin = 1; opts->tcp_send_spin = 1; opts->pkt_wait_spin = 1; opts->sock_lock_buzz = 1; opts->stack_lock_buzz = 1; } if( (s = getenv("EF_BUZZ_USEC")) && atoi(s) ) { opts->sock_lock_buzz = 1; opts->stack_lock_buzz = 1; } GET_ENV_OPT_HEX("EF_UNIX_LOG", log_level); GET_ENV_OPT_INT("EF_PROBE", probe); GET_ENV_OPT_INT("EF_TCP", ul_tcp); GET_ENV_OPT_INT("EF_UDP", ul_udp); GET_ENV_OPT_INT("EF_UL_SELECT", ul_select); GET_ENV_OPT_INT("EF_SELECT_SPIN", ul_select_spin); GET_ENV_OPT_INT("EF_SELECT_FAST", ul_select_fast); GET_ENV_OPT_INT("EF_UL_POLL", ul_poll); GET_ENV_OPT_INT("EF_POLL_SPIN", ul_poll_spin); GET_ENV_OPT_INT("EF_POLL_FAST", ul_poll_fast); GET_ENV_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); GET_ENV_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec); GET_ENV_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); GET_ENV_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec); #if CI_CFG_UDP GET_ENV_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); GET_ENV_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); #endif GET_ENV_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); GET_ENV_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); GET_ENV_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin); GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN",tcp_connect_spin); GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); #if CI_CFG_USERSPACE_PIPE GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); GET_ENV_OPT_INT("EF_PIPE_SIZE", pipe_size); #endif GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); GET_ENV_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz); GET_ENV_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin); #if CI_CFG_USERSPACE_EPOLL GET_ENV_OPT_INT("EF_UL_EPOLL", ul_epoll); if( opts->ul_epoll == 0 && ci_cfg_opts.netif_opts.int_driven == 0 ) { ci_log("EF_INT_DRIVEN=0 and EF_UL_EPOLL=0 are not compatible. " "EF_INT_DRIVEN can be set to 0 implicitly, because of non-zero " "EF_POLL_USEC. If you need both spinning and EF_UL_EPOLL=0, " "please set EF_INT_DRIVEN=1 explicitly."); } GET_ENV_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF",ul_epoll_ctl_handoff); GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); #endif GET_ENV_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); GET_ENV_OPT_INT("EF_SPIN_USEC", ul_spin_usec); GET_ENV_OPT_INT("EF_STACK_PER_THREAD",stack_per_thread); GET_ENV_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); GET_ENV_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); GET_ENV_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); GET_ENV_OPT_INT("EF_NO_FAIL", no_fail); GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK", accept_force_inherit_nonblock); GET_ENV_OPT_INT("EF_VFORK_MODE", vfork_mode); #if CI_CFG_USERSPACE_PIPE GET_ENV_OPT_INT("EF_PIPE", ul_pipe); #endif if( (s = getenv("EF_FORK_NETIF")) && sscanf(s, "%x", &v) == 1 ) { opts->fork_netif = CI_MIN(v, CI_UNIX_FORK_NETIF_BOTH); } if( (s = getenv("EF_NETIF_DTOR")) && sscanf(s, "%x", &v) == 1 ) { opts->netif_dtor = CI_MIN(v, CITP_NETIF_DTOR_ALL); } if( (s = getenv("EF_SIGNALS_NOPOSTPONE")) ) { opts->signals_no_postpone = 0; while( sscanf(s, "%u", &v) == 1 ) { opts->signals_no_postpone |= (1 << (v-1)); s = strchr(s, ','); if( s == NULL ) break; s++; } } if( (s = getenv("EF_CLUSTER_NAME")) ) { strncpy(opts->cluster_name, s, CI_CFG_CLUSTER_NAME_LEN); opts->cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0'; } else { opts->cluster_name[0] = '\0'; } GET_ENV_OPT_INT("EF_CLUSTER_SIZE", cluster_size); if( opts->cluster_size < 2 ) log("ERROR: cluster_size < 2 are not supported"); GET_ENV_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); get_env_opt_port_list(&opts->tcp_reuseports, "EF_TCP_FORCE_REUSEPORT"); get_env_opt_port_list(&opts->udp_reuseports, "EF_UDP_FORCE_REUSEPORT"); #if CI_CFG_FD_CACHING get_env_opt_port_list(&opts->sock_cache_ports, "EF_SOCKET_CACHE_PORTS"); #endif }
static void citp_opts_getenv(citp_opts_t* opts) { /* ?? TODO: would like to use opts_citp_def.h here */ const char* s; unsigned v; opts->log_via_ioctl = 3; /* TODO: Old name. Keeping reading 'til 2011, then purge. */ GET_ENV_OPT_HEX("EF_Log_VIA_IOCTL", log_via_ioctl); GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL", log_via_ioctl); if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3) { opts->log_via_ioctl = 0; citp_log_to_file(s); } else if( opts->log_via_ioctl == 3 ) { /* citp_setup_logging_early() have already detected stderr as * tty/non-tty, so just trust it. */ if( ci_log_fn == citp_log_fn_drv ) opts->log_via_ioctl = 1; else opts->log_via_ioctl = 0; } if( opts->log_via_ioctl ) { ci_log_options &=~ CI_LOG_PID; citp_setup_logging_change(citp_log_fn_drv); } else { GET_ENV_OPT_INT("EF_LOG_TIMESTAMPS", log_timestamps); if( opts->log_timestamps ) ci_log_options |= CI_LOG_TIME; citp_setup_logging_change(citp_log_fn_ul); } if( getenv("EF_LOG_THREAD") ) ci_log_options |= CI_LOG_TID; if( getenv("EF_POLL_NONBLOCK_FAST_LOOPS") && ! getenv("EF_POLL_NONBLOCK_FAST_USEC") ) log("ERROR: EF_POLL_NONBLOCK_FAST_LOOPS is deprecated, use" " EF_POLL_NONBLOCK_FAST_USEC instead"); if( getenv("EF_POLL_FAST_LOOPS") && ! getenv("EF_POLL_FAST_USEC") ) log("ERROR: EF_POLL_FAST_LOOPS is deprecated, use" " EF_POLL_FAST_USEC instead"); if( (s = getenv("EF_POLL_USEC")) && atoi(s) ) { /* Any changes to the behaviour triggered by this meta * option must also be made to the extensions API option * ONLOAD_SPIN_MIMIC_EF_POLL */ GET_ENV_OPT_INT("EF_POLL_USEC", ul_spin_usec); GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); opts->ul_select_spin = 1; opts->ul_poll_spin = 1; #if CI_CFG_USERSPACE_EPOLL opts->ul_epoll_spin = 1; #endif #if CI_CFG_UDP opts->udp_recv_spin = 1; opts->udp_send_spin = 1; #endif opts->tcp_recv_spin = 1; opts->tcp_send_spin = 1; opts->pkt_wait_spin = 1; opts->sock_lock_buzz = 1; opts->stack_lock_buzz = 1; } if( (s = getenv("EF_BUZZ_USEC")) && atoi(s) ) { opts->sock_lock_buzz = 1; opts->stack_lock_buzz = 1; } GET_ENV_OPT_HEX("EF_UNIX_LOG", log_level); GET_ENV_OPT_INT("EF_PROBE", probe); GET_ENV_OPT_INT("EF_TCP", ul_tcp); GET_ENV_OPT_INT("EF_UDP", ul_udp); GET_ENV_OPT_INT("EF_UL_SELECT", ul_select); GET_ENV_OPT_INT("EF_SELECT_SPIN", ul_select_spin); GET_ENV_OPT_INT("EF_SELECT_FAST", ul_select_fast); GET_ENV_OPT_INT("EF_UL_POLL", ul_poll); GET_ENV_OPT_INT("EF_POLL_SPIN", ul_poll_spin); GET_ENV_OPT_INT("EF_POLL_FAST", ul_poll_fast); GET_ENV_OPT_INT("EF_POLL_FAST_USEC", ul_poll_fast_usec); GET_ENV_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec); GET_ENV_OPT_INT("EF_SELECT_FAST_USEC", ul_select_fast_usec); GET_ENV_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec); #if CI_CFG_UDP GET_ENV_OPT_INT("EF_UDP_RECV_SPIN", udp_recv_spin); GET_ENV_OPT_INT("EF_UDP_SEND_SPIN", udp_send_spin); #endif GET_ENV_OPT_INT("EF_TCP_RECV_SPIN", tcp_recv_spin); GET_ENV_OPT_INT("EF_TCP_SEND_SPIN", tcp_send_spin); GET_ENV_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin); GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN",tcp_connect_spin); GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN", pkt_wait_spin); #if CI_CFG_USERSPACE_PIPE GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN", pipe_recv_spin); GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN", pipe_send_spin); GET_ENV_OPT_INT("EF_PIPE_SIZE", pipe_size); #endif GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ", sock_lock_buzz); GET_ENV_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz); GET_ENV_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin); #if CI_CFG_USERSPACE_EPOLL GET_ENV_OPT_INT("EF_UL_EPOLL", ul_epoll); GET_ENV_OPT_INT("EF_EPOLL_SPIN", ul_epoll_spin); GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST", ul_epoll_ctl_fast); GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF",ul_epoll_ctl_handoff); GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE", ul_epoll_mt_safe); GET_ENV_OPT_INT("EF_WODA_SINGLE_INTERFACE", woda_single_if); #endif GET_ENV_OPT_INT("EF_FDTABLE_SIZE", fdtable_size); GET_ENV_OPT_INT("EF_SPIN_USEC", ul_spin_usec); GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec); GET_ENV_OPT_INT("EF_STACK_PER_THREAD",stack_per_thread); GET_ENV_OPT_INT("EF_DONT_ACCELERATE", dont_accelerate); GET_ENV_OPT_INT("EF_FDTABLE_STRICT", fdtable_strict); GET_ENV_OPT_INT("EF_FDS_MT_SAFE", fds_mt_safe); GET_ENV_OPT_INT("EF_NO_FAIL", no_fail); GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT", sa_onstack_intercept); GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK", accept_force_inherit_nonblock); GET_ENV_OPT_INT("EF_VFORK_MODE", vfork_mode); #if CI_CFG_USERSPACE_PIPE GET_ENV_OPT_INT("EF_PIPE", ul_pipe); #endif GET_ENV_OPT_INT("EF_SYNC_CPLANE_AT_CREATE", sync_cplane); if( (s = getenv("EF_FORK_NETIF")) && sscanf(s, "%x", &v) == 1 ) { opts->fork_netif = CI_MIN(v, CI_UNIX_FORK_NETIF_BOTH); } if( (s = getenv("EF_NETIF_DTOR")) && sscanf(s, "%x", &v) == 1 ) { opts->netif_dtor = CI_MIN(v, CITP_NETIF_DTOR_ALL); } if( (s = getenv("EF_SIGNALS_NOPOSTPONE")) ) { opts->signals_no_postpone = 0; while( sscanf(s, "%u", &v) == 1 ) { opts->signals_no_postpone |= (1 << (v-1)); s = strchr(s, ','); if( s == NULL ) break; s++; } } if( (s = getenv("EF_CLUSTER_NAME")) ) { strncpy(opts->cluster_name, s, CI_CFG_CLUSTER_NAME_LEN); opts->cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0'; } else { opts->cluster_name[0] = '\0'; } GET_ENV_OPT_INT("EF_CLUSTER_SIZE", cluster_size); if( opts->cluster_size < 1 ) log("ERROR: cluster_size needs to be a positive number"); GET_ENV_OPT_INT("EF_CLUSTER_RESTART", cluster_restart_opt); GET_ENV_OPT_INT("EF_CLUSTER_HOT_RESTART", cluster_hot_restart_opt); get_env_opt_port_list(&opts->tcp_reuseports, "EF_TCP_FORCE_REUSEPORT"); get_env_opt_port_list(&opts->udp_reuseports, "EF_UDP_FORCE_REUSEPORT"); #if CI_CFG_FD_CACHING get_env_opt_port_list(&opts->sock_cache_ports, "EF_SOCKET_CACHE_PORTS"); #endif GET_ENV_OPT_INT("EF_ONLOAD_FD_BASE", fd_base); }
/* ** promote a synrecv structure to an established socket ** ** Assumes that the caller will handle a fail if we can't allocate a new ** tcp_state structure due to memory pressure or the like */ int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls, ci_tcp_state_synrecv* tsr, ci_ip_cached_hdrs* ipcache, ci_tcp_state** ts_out) { int rc = 0; ci_assert(netif); ci_assert(tls); ci_assert(tls->s.b.state == CI_TCP_LISTEN); ci_assert(tsr); if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) { ci_tcp_state* ts; /* grab a tcp_state structure that will go onto the accept queue. We take * from the cache of EPs if any are available */ ts = get_ts_from_cache (netif, tsr, tls); if( !ts ) { /* None on cache; try allocating a new ts */ ts = ci_tcp_get_state_buf(netif); #if CI_CFG_FD_CACHING if( ts == NULL ) { /* We've reaped. Did this result in any being cached */ ts = get_ts_from_cache(netif, tsr, tls); if (ts == NULL ) { /* No -- try again to allocate. */ ts = ci_tcp_get_state_buf(netif); } else { CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap); } } #endif if( ts == NULL ) { LOG_TV(ci_log("%s: [%d] out of socket buffers", __FUNCTION__, NI_ID(netif))); CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock); CI_SET_SO_ERROR(&tls->s, ENOMEM); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); return -ENOMEM; } ci_assert(ci_tcp_is_cached(ts) || (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); } #ifdef ONLOAD_OFE ts->s.ofe_code_start = tls->ofe_promote; #endif if( ! ci_tcp_is_cached(ts) ) { /* Need to initialise address information for use when setting filters */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); /* "borrow" filter from listening socket. For loopback socket, we * do not need filters, but we have to take a reference of the OS * socket. */ rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice, S_SP(tls)); if( rc < 0 ) { LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc)); /* Either put this back on the list (at the head) or free it */ ci_tcp_state_free(netif, ts); return rc; } } #if CI_CFG_FD_CACHING else { /* Now set the s/w filter. We leave the hw filter in place for cached * EPS. This will probably not have the correct raddr and rport, but as * it's sharing the listening socket's filter that's not a problem. It * will be updated if this is still around when the listener is closed. */ rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr, sock_lport_be16(&tls->s), tsr->r_addr, tsr->r_port, tcp_protocol(ts)); if (rc < 0) { /* Bung it back on the cache list */ LOG_EP(ci_log("Unable to create s/w filter!")); ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link); return rc; } /* Need to initialise address information. We do this after trying to * insert the sw filter, so we can push the tcp state back onto the * cache queue with as few changes as possible if we fail to add the * sw filter. */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd)); ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link); } #endif ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts))); ci_assert(ts->s.b.state == CI_TCP_CLOSED); ts->s.domain = tls->s.domain; cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache); ci_pmtu_state_init(netif, &ts->s, &ts->pmtus, CI_IP_TIMER_PMTU_DISCOVER); ci_pmtu_set(netif, &ts->pmtus, CI_MIN(ts->s.pkt.mtu, tsr->tcpopts.smss + sizeof(ci_tcp_hdr) + sizeof(ci_ip4_hdr))); /* If we've got SYN via local route, we can handle it */ ci_assert_equiv(ts->s.pkt.status == retrrc_localroute, OO_SP_NOT_NULL(tsr->local_peer)); if( ts->s.pkt.status == retrrc_localroute ) ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE; ts->amss = tsr->amss; /* options and flags */ ts->tcpflags = 0; ts->tcpflags |= tsr->tcpopts.flags; ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED; ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr); if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) { ts->snd_wscl = tsr->tcpopts.wscl_shft; ts->rcv_wscl = tsr->rcv_wscl; } else { ts->snd_wscl = ts->rcv_wscl = 0u; } CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl); CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl); /* Send and receive sequence numbers */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = tsr->snd_isn + 1; ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0); ci_tcp_rx_set_isn(ts, tsr->rcv_nxt); tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1); if( ts->tcpflags & CI_TCPT_FLAG_TSO ) { ts->incoming_tcp_hdr_len += 12; ts->outgoing_hdrs_len += 12; ts->tspaws = ci_tcp_time_now(netif); ts->tsrecent = tsr->tspeer; ts->tslastack = tsr->rcv_nxt; } else { /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ts->timed_ts = tsr->timest; } /* SACK has nothing to be done. */ /* ?? ECN */ ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr))); ts->smss = tsr->tcpopts.smss; ts->c.user_mss = tls->c.user_mss; if (ts->c.user_mss && ts->c.user_mss < ts->smss) ts->smss = ts->c.user_mss; #if CI_CFG_LIMIT_SMSS ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__); #endif ci_assert(ts->smss>0); ci_tcp_set_eff_mss(netif, ts); ci_tcp_set_initialcwnd(netif, ts); /* Copy socket options & related fields that should be inherited. * Note: Windows does not inherit rcvbuf until the call to accept * completes. The assumption here is that all options can be * inherited at the same time (most won't have an effect until there * is a socket available for use by the app.). */ ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)"); /* NB. Must have already set peer (which we have). */ ci_tcp_set_established_state(netif, ts); CITP_STATS_NETIF(++netif->state->stats.synrecv2established); ci_assert(ts->ka_probes == 0); ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts)); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); /* Remove the synrecv structure from the listen queue, and free the ** buffer. */ if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE ) ci_free(tsr); else { ci_tcp_listenq_remove(netif, tls, tsr); ci_tcp_synrecv_free(netif, tsr); } ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT); ci_tcp_acceptq_put(netif, tls, &ts->s.b); LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x", LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags); log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x", LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts), tcp_snd_una(ts), tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts))); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); *ts_out = ts; return 0; }