Ejemplo n.º 1
0
static int 
efab_vi_rm_mmap_mem(struct efrm_vi *virs,
                    unsigned long *bytes, void *opaque,
                    int *map_num, unsigned long *offset)
{
  int queue_type;
  uint32_t len;

  if( virs->q[EFHW_EVQ].capacity != 0 ) {
    len = efhw_iopages_size(&virs->q[EFHW_EVQ].pages);
    len = CI_MIN(len, *bytes);
    ci_assert_gt(len, 0);
    ci_mmap_iopages(&virs->q[EFHW_EVQ].pages, 0,
                    len, bytes, opaque, map_num, offset);
    if(*bytes == 0)
      return 0;
  }

  for( queue_type=EFRM_VI_RM_DMA_QUEUE_COUNT-1;
       queue_type>=0;
       queue_type-- ) {
    if( virs->q[queue_type].capacity != 0 ) {
      len = efhw_iopages_size(&virs->q[queue_type].pages);
      len = CI_MIN(len, *bytes);
      ci_assert_gt(len, 0);
      ci_mmap_iopages(&virs->q[queue_type].pages, 0,
                      len, bytes, opaque, map_num, offset);
      if(*bytes == 0)
        return 0;
    }
  }

  return 0;
}
Ejemplo n.º 2
0
static int
efab_vi_rm_mmap_ctpio(struct efrm_vi *virs, unsigned long *bytes, void *opaque,
                      int *map_num, unsigned long *offset)
{
  int rc;
  int len;
  int instance;
  struct efhw_nic *nic;
  int bar_off;

  /* The CTPIO region is 12K from the start of the VI's aperture. */
  const int CTPIO_OFFSET = 12 * 1024;

  instance = virs->rs.rs_instance;

  if( ! (virs->flags & EFHW_VI_TX_CTPIO) ) {
    EFRM_ERR("%s: CTPIO is not enabled on VI instance %d\n", __FUNCTION__,
	     instance);
    return -EINVAL;
  }

  /* Map the CTPIO region, which is 12K from the start of the VI's aperture. */
  len = CI_MIN(*bytes, CI_PAGE_SIZE);
  *bytes -= len;
  nic = efrm_client_get_nic(virs->rs.rs_client);
  ci_assert_ge(nic->vi_stride, CTPIO_OFFSET + len);
  bar_off = (ef10_tx_dma_page_base(nic->vi_stride, instance) + CTPIO_OFFSET) &
            PAGE_MASK;
  rc = ci_mmap_bar(nic, bar_off, len, opaque, map_num, offset, 1);
  if( rc < 0 )
    EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc);
  return rc;
}
Ejemplo n.º 3
0
static void ci_udp_pkt_to_zc_msg(ci_netif* ni, ci_ip_pkt_fmt* pkt,
                                 struct onload_zc_msg* zc_msg)
{
  int i, bytes_left = pkt->pf.udp.pay_len;
  ci_ip_pkt_fmt* frag;
  ci_ip_pkt_fmt* handle_frag;

  handle_frag = frag = pkt;
  i = 0;
  ci_assert_nequal(zc_msg->iov, NULL);

  /* Ignore first frag if zero length and there is another frag, but
   * still pass the zero-length buffer as the onload_zc_handle so it
   * will get freed correctly
   */
  if( oo_offbuf_left(&frag->buf) == 0 && OO_PP_NOT_NULL(frag->frag_next) )
    frag = PKT_CHK_NNL(ni, frag->frag_next);

  do {
    zc_msg->iov[i].iov_len = CI_MIN(oo_offbuf_left(&frag->buf), 
                                    bytes_left);
    zc_msg->iov[i].iov_base = oo_offbuf_ptr(&frag->buf);
    zc_msg->iov[i].buf = (onload_zc_handle)handle_frag;
    zc_msg->iov[i].iov_flags = 0;
    bytes_left -= zc_msg->iov[i].iov_len;
    ++i;
    if( OO_PP_IS_NULL(frag->frag_next) || 
        (i == CI_UDP_ZC_IOVEC_MAX) ||
        (bytes_left == 0) )
      break;
    frag = PKT_CHK_NNL(ni, frag->frag_next);
    handle_frag = frag;
  } while( 1 );
  zc_msg->msghdr.msg_iovlen = i;
}
Ejemplo n.º 4
0
static int
efab_vi_rm_mmap_pio(struct efrm_vi *virs,
		    unsigned long *bytes, void *opaque,
		    int *map_num, unsigned long *offset)
{
  int rc;
  int len;
  int instance;
  struct efhw_nic *nic;
  int bar_off;

  nic = efrm_client_get_nic(virs->rs.rs_client);

  if( nic->devtype.arch != EFHW_ARCH_EF10 ) {
    EFRM_ERR("%s: Only ef10 supports PIO."
	     "  Expected arch=%d but got %d\n", __FUNCTION__,
	     EFHW_ARCH_EF10, nic->devtype.arch);
    return -EINVAL;
  }

  instance = virs->rs.rs_instance;

  /* Map the control page. */
  len = CI_MIN(*bytes, CI_PAGE_SIZE);
  *bytes -= len;
  bar_off = (ef10_tx_dma_page_base(nic->vi_stride, instance) + 4096) &
            PAGE_MASK;
  rc = ci_mmap_bar(nic, bar_off, len, opaque, map_num, offset, 1);
  if( rc < 0 )
    EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc);
  return rc;
}
Ejemplo n.º 5
0
int citp_do_init(int max_init_level)
{
  int rc = 0;
  int level;
  int saved_errno = errno;

  if( citp.init_level < max_init_level ) {
    /* If threads are launched very early in program startup, then there could be
     * a race here as multiple threads attempt to initialise on first access.
     * The guard must be recursive, since this function might be re-entered during
     * initialisation.
     */
    static pthread_mutex_t mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;

    pthread_mutex_lock(&mutex);
    _citp_do_init_inprogress++;

    for (level = citp.init_level;
         level < CI_MIN(max_init_level, CITP_INIT_ALL);
         level++) {
      rc = cipt_init_funcs[level]();
      if (rc < 0)
        break;
      citp.init_level = level + 1;
    }

    --_citp_do_init_inprogress;
    pthread_mutex_unlock(&mutex);
  }
  Log_S(log("%s: reached level %d", __FUNCTION__, citp.init_level));
  if( rc == 0 )
    errno = saved_errno;
  return rc;
}
Ejemplo n.º 6
0
static int
oo_copy_pkt_to_iovec_no_adv(ci_netif* ni, const ci_ip_pkt_fmt* pkt,
                            ci_iovec_ptr* piov, int bytes_to_copy)
{
  /* Copy data from [pkt] to [piov], following [pkt->frag_next] as
   * necessary.  Does not modify [pkt].  May or may not advance [piov].
   * The packet must contain at least [bytes_to_copy] of data in the
   * [pkt->buf].  [piov] may contain an arbitrary amount of space.
   *
   * Returns number of bytes copied on success, or -EFAULT otherwise.
   */
  int n, pkt_left, pkt_off = 0;
  int bytes_copied = 0;

  while( 1 ) {
    pkt_left = oo_offbuf_left(&pkt->buf) - pkt_off;
    n = CI_MIN(pkt_left, CI_IOVEC_LEN(&piov->io));
    n = CI_MIN(n, bytes_to_copy);
    if(CI_UNLIKELY( do_copy(CI_IOVEC_BASE(&piov->io),
                            oo_offbuf_ptr(&pkt->buf) + pkt_off, n) != 0 ))
      return -EFAULT;

    bytes_copied += n;
    pkt_off += n;
    if( n == bytes_to_copy )
      return bytes_copied;

    bytes_to_copy -= n;
    if( n == pkt_left ) {
      /* Caller guarantees that packet contains at least [bytes_to_copy]. */
      ci_assert(OO_PP_NOT_NULL(pkt->frag_next));
      ci_iovec_ptr_advance(piov, n);
      pkt = PKT_CHK_NNL(ni, pkt->frag_next);
      pkt_off = 0;
      /* We're unlikely to hit end-of-pkt-buf and end-of-iovec at the same
       * time, and if we do, just go round the loop again.
       */
      continue;
    }

    ci_assert_equal(n, CI_IOVEC_LEN(&piov->io));
    if( piov->iovlen == 0 )
      return bytes_copied;
    piov->io = *piov->iov++;
    --piov->iovlen;
  }
}
Ejemplo n.º 7
0
static int
efab_vi_rm_mmap_io(struct efrm_vi *virs,
                   unsigned long *bytes, void *opaque,
                   int *map_num, unsigned long *offset)
{
  int rc;
  int len;
  int instance;
  int base;
  unsigned vi_stride;
  struct efhw_nic *nic;

  nic = efrm_client_get_nic(virs->rs.rs_client);

  instance = virs->rs.rs_instance;

  len = CI_MIN(*bytes, CI_PAGE_SIZE);
  *bytes -=len;

  /* Make sure we can get away with a single page here. */
  switch (nic->devtype.arch) {
  case EFHW_ARCH_FALCON:
    ci_assert_lt(falcon_tx_dma_page_offset(instance), CI_PAGE_SIZE);
    ci_assert_lt(falcon_rx_dma_page_offset(instance), CI_PAGE_SIZE);
    ci_assert_equal(falcon_tx_dma_page_base(instance),
                    falcon_rx_dma_page_base(instance));
    base = falcon_tx_dma_page_base(instance);
    break;

  case EFHW_ARCH_EF10:
    vi_stride = nic->vi_stride;
    ci_assert_lt(ef10_tx_dma_page_offset(vi_stride, instance), CI_PAGE_SIZE);
    ci_assert_lt(ef10_rx_dma_page_offset(vi_stride, instance), CI_PAGE_SIZE);
    ci_assert_equal(ef10_tx_dma_page_base(vi_stride, instance),
                    ef10_rx_dma_page_base(vi_stride, instance));
    base = ef10_tx_dma_page_base(vi_stride, instance);
    break;

  default:
    EFCH_ERR("%s: ERROR: unknown nic type (%d)", __FUNCTION__,
	     nic->devtype.arch);
    base = 0; /* To quiet the compiler */
    BUG();
  }

  rc = ci_mmap_bar(nic, base, len, opaque, map_num, offset, 0);
  if (rc < 0 ) {
    EFCH_ERR("%s: ERROR: ci_mmap_bar failed rc=%d", __FUNCTION__, rc);
    return rc;
  }

  return 0;
}
Ejemplo n.º 8
0
static void citp_get_process_name(void)
{
  citp.process_name = citp.process_path;

  ci_sprintf(citp.process_path, "<unknown-proc>");

  {
    int n;

    n = readlink("/proc/self/exe", citp.process_path,
                 sizeof(citp.process_path));
    if (n < 0)
      return;

    n = CI_MIN(n + 1, sizeof(citp.process_path));
    citp.process_path[n - 1] = '\0';
    citp.process_name = citp.process_path + n - 2;
    while (citp.process_name > citp.process_path &&
           citp.process_name[-1] != '/')
      --citp.process_name;
  }
}
Ejemplo n.º 9
0
int citp_do_init(int max_init_level)
{
  int rc = 0;
  int level;
  int saved_errno = errno;

  _citp_do_init_inprogress++;

  for (level = citp.init_level;
       level < CI_MIN(max_init_level, CITP_INIT_ALL);
       level++) {
    rc = cipt_init_funcs[level]();
    if (rc < 0)
      break;
    citp.init_level = level + 1;
  }

  --_citp_do_init_inprogress;
  Log_S(log("%s: reached level %d", __FUNCTION__, citp.init_level));
  if( rc == 0 )
    errno = saved_errno;
  return rc;
}
Ejemplo n.º 10
0
/* Set the IP TOS */
void ci_udp_set_tos( ci_udp_state* us, ci_uint32 tos )
{
  ci_ip_hdr_init_fixed(UDP_IP_HDR(us), IPPROTO_UDP, UDP_IP_HDR(us)->ip_ttl,
		       CI_MIN(tos, CI_IP_MAX_TOS));
}
Ejemplo n.º 11
0
static void citp_opts_getenv(citp_opts_t* opts)
{
  /* ?? TODO: would like to use opts_citp_def.h here */

  const char* s;
  unsigned v;

  opts->log_via_ioctl = 3;
  /* TODO: Old name.  Keeping reading 'til 2011, then purge. */
  GET_ENV_OPT_HEX("EF_Log_VIA_IOCTL",	log_via_ioctl);
  GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL",	log_via_ioctl);

  if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3) {
    opts->log_via_ioctl = 0;
    citp_log_to_file(s);
  } else if( opts->log_via_ioctl == 3 ) {
    /* citp_setup_logging_early() have already detected stderr as
     * tty/non-tty, so just trust it. */
    if( ci_log_fn == citp_log_fn_drv )
      opts->log_via_ioctl = 1;
    else
      opts->log_via_ioctl = 0;
  }

  if( opts->log_via_ioctl ) {
    ci_log_options &=~ CI_LOG_PID;
    citp_setup_logging_change(citp_log_fn_drv);
  } else {
    if( getenv("EF_LOG_TIMESTAMPS") )
      ci_log_options |= CI_LOG_TIME;
    citp_setup_logging_change(citp_log_fn_ul);
  }

  if( getenv("EF_POLL_NONBLOCK_FAST_LOOPS") &&
      ! getenv("EF_POLL_NONBLOCK_FAST_USEC") )
    log("ERROR: EF_POLL_NONBLOCK_FAST_LOOPS is deprecated, use"
        " EF_POLL_NONBLOCK_FAST_USEC instead");

  if( getenv("EF_POLL_FAST_LOOPS") && ! getenv("EF_POLL_FAST_USEC") )
    log("ERROR: EF_POLL_FAST_LOOPS is deprecated, use"
        " EF_POLL_FAST_USEC instead");

  if( (s = getenv("EF_POLL_USEC")) && atoi(s) ) {
    GET_ENV_OPT_INT("EF_POLL_USEC", ul_spin_usec);
    opts->ul_select_spin = 1;
    opts->ul_poll_spin = 1;
#if CI_CFG_USERSPACE_EPOLL
    opts->ul_epoll_spin = 1;
#endif
#if CI_CFG_UDP
    opts->udp_recv_spin = 1;
    opts->udp_send_spin = 1;
#endif
    opts->tcp_recv_spin = 1;
    opts->tcp_send_spin = 1;
    opts->pkt_wait_spin = 1;
    opts->sock_lock_buzz = 1;
    opts->stack_lock_buzz = 1;
  }

  if( (s = getenv("EF_BUZZ_USEC")) && atoi(s) ) {
    opts->sock_lock_buzz = 1;
    opts->stack_lock_buzz = 1;
  }

  GET_ENV_OPT_HEX("EF_UNIX_LOG",	log_level);
  GET_ENV_OPT_INT("EF_PROBE",		probe);
  GET_ENV_OPT_INT("EF_TCP",		ul_tcp);
  GET_ENV_OPT_INT("EF_UDP",		ul_udp);
  GET_ENV_OPT_INT("EF_UL_SELECT",	ul_select);
  GET_ENV_OPT_INT("EF_SELECT_SPIN",	ul_select_spin);
  GET_ENV_OPT_INT("EF_SELECT_FAST",	ul_select_fast);
  GET_ENV_OPT_INT("EF_UL_POLL",		ul_poll);
  GET_ENV_OPT_INT("EF_POLL_SPIN",	ul_poll_spin);
  GET_ENV_OPT_INT("EF_POLL_FAST",	ul_poll_fast);
  GET_ENV_OPT_INT("EF_POLL_FAST_USEC",  ul_poll_fast_usec);
  GET_ENV_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec);
  GET_ENV_OPT_INT("EF_SELECT_FAST_USEC",  ul_select_fast_usec);
  GET_ENV_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec);
#if CI_CFG_UDP
  GET_ENV_OPT_INT("EF_UDP_RECV_SPIN",   udp_recv_spin);
  GET_ENV_OPT_INT("EF_UDP_SEND_SPIN",   udp_send_spin);
#endif
  GET_ENV_OPT_INT("EF_TCP_RECV_SPIN",   tcp_recv_spin);
  GET_ENV_OPT_INT("EF_TCP_SEND_SPIN",   tcp_send_spin);
  GET_ENV_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin);
  GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN",tcp_connect_spin);
  GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN",   pkt_wait_spin);
#if CI_CFG_USERSPACE_PIPE
  GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN",  pipe_recv_spin);
  GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN",  pipe_send_spin);
  GET_ENV_OPT_INT("EF_PIPE_SIZE",       pipe_size);
#endif
  GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ",  sock_lock_buzz);
  GET_ENV_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz);
  GET_ENV_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin);
#if CI_CFG_USERSPACE_EPOLL
  GET_ENV_OPT_INT("EF_UL_EPOLL",        ul_epoll);
  if( opts->ul_epoll == 0 && ci_cfg_opts.netif_opts.int_driven == 0 ) {
    ci_log("EF_INT_DRIVEN=0 and EF_UL_EPOLL=0 are not compatible.  "
           "EF_INT_DRIVEN can be set to 0 implicitly, because of non-zero "
           "EF_POLL_USEC.  If you need both spinning and EF_UL_EPOLL=0, "
           "please set EF_INT_DRIVEN=1 explicitly.");
  }
  GET_ENV_OPT_INT("EF_EPOLL_SPIN",      ul_epoll_spin);
  GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST",  ul_epoll_ctl_fast);
  GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF",ul_epoll_ctl_handoff);
  GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE",   ul_epoll_mt_safe);
#endif
  GET_ENV_OPT_INT("EF_FDTABLE_SIZE",	fdtable_size);
  GET_ENV_OPT_INT("EF_SPIN_USEC",	ul_spin_usec);
  GET_ENV_OPT_INT("EF_STACK_PER_THREAD",stack_per_thread);
  GET_ENV_OPT_INT("EF_DONT_ACCELERATE",	dont_accelerate);
  GET_ENV_OPT_INT("EF_FDTABLE_STRICT",	fdtable_strict);
  GET_ENV_OPT_INT("EF_FDS_MT_SAFE",	fds_mt_safe);
  GET_ENV_OPT_INT("EF_NO_FAIL",		no_fail);
  GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT",	sa_onstack_intercept);
  GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK",	accept_force_inherit_nonblock);
  GET_ENV_OPT_INT("EF_VFORK_MODE",	vfork_mode);
#if CI_CFG_USERSPACE_PIPE
  GET_ENV_OPT_INT("EF_PIPE",        ul_pipe);
#endif

  if( (s = getenv("EF_FORK_NETIF")) && sscanf(s, "%x", &v) == 1 ) {
    opts->fork_netif = CI_MIN(v, CI_UNIX_FORK_NETIF_BOTH);
  }
  if( (s = getenv("EF_NETIF_DTOR")) && sscanf(s, "%x", &v) == 1 ) {
    opts->netif_dtor = CI_MIN(v, CITP_NETIF_DTOR_ALL);
  }

  if( (s = getenv("EF_SIGNALS_NOPOSTPONE")) ) {
    opts->signals_no_postpone = 0;
    while( sscanf(s, "%u", &v) == 1 ) {
      opts->signals_no_postpone |= (1 << (v-1));
      s = strchr(s, ',');
      if( s == NULL )
        break;
      s++;
    }
  }

  if( (s = getenv("EF_CLUSTER_NAME")) ) {
    strncpy(opts->cluster_name, s, CI_CFG_CLUSTER_NAME_LEN);
    opts->cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0';
  }
  else {
    opts->cluster_name[0] = '\0';
  }
  GET_ENV_OPT_INT("EF_CLUSTER_SIZE",	cluster_size);
  if( opts->cluster_size < 2 )
    log("ERROR: cluster_size < 2 are not supported");
  GET_ENV_OPT_INT("EF_CLUSTER_RESTART",	cluster_restart_opt);
  get_env_opt_port_list(&opts->tcp_reuseports, "EF_TCP_FORCE_REUSEPORT");
  get_env_opt_port_list(&opts->udp_reuseports, "EF_UDP_FORCE_REUSEPORT");

#if CI_CFG_FD_CACHING
  get_env_opt_port_list(&opts->sock_cache_ports, "EF_SOCKET_CACHE_PORTS");
#endif
}
Ejemplo n.º 12
0
static void citp_opts_getenv(citp_opts_t* opts)
{
  /* ?? TODO: would like to use opts_citp_def.h here */

  const char* s;
  unsigned v;

  opts->log_via_ioctl = 3;
  /* TODO: Old name.  Keeping reading 'til 2011, then purge. */
  GET_ENV_OPT_HEX("EF_Log_VIA_IOCTL",	log_via_ioctl);
  GET_ENV_OPT_INT("EF_LOG_VIA_IOCTL",	log_via_ioctl);

  if( (s = getenv("EF_LOG_FILE")) && opts->log_via_ioctl == 3) {
    opts->log_via_ioctl = 0;
    citp_log_to_file(s);
  } else if( opts->log_via_ioctl == 3 ) {
    /* citp_setup_logging_early() have already detected stderr as
     * tty/non-tty, so just trust it. */
    if( ci_log_fn == citp_log_fn_drv )
      opts->log_via_ioctl = 1;
    else
      opts->log_via_ioctl = 0;
  }

  if( opts->log_via_ioctl ) {
    ci_log_options &=~ CI_LOG_PID;
    citp_setup_logging_change(citp_log_fn_drv);
  } else {
    GET_ENV_OPT_INT("EF_LOG_TIMESTAMPS", log_timestamps);
    if( opts->log_timestamps )
      ci_log_options |= CI_LOG_TIME;
    citp_setup_logging_change(citp_log_fn_ul);
  }
  if( getenv("EF_LOG_THREAD") )
    ci_log_options |= CI_LOG_TID;


  if( getenv("EF_POLL_NONBLOCK_FAST_LOOPS") &&
      ! getenv("EF_POLL_NONBLOCK_FAST_USEC") )
    log("ERROR: EF_POLL_NONBLOCK_FAST_LOOPS is deprecated, use"
        " EF_POLL_NONBLOCK_FAST_USEC instead");

  if( getenv("EF_POLL_FAST_LOOPS") && ! getenv("EF_POLL_FAST_USEC") )
    log("ERROR: EF_POLL_FAST_LOOPS is deprecated, use"
        " EF_POLL_FAST_USEC instead");

  if( (s = getenv("EF_POLL_USEC")) && atoi(s) ) {
    /* Any changes to the behaviour triggered by this meta
     * option must also be made to the extensions API option
     * ONLOAD_SPIN_MIMIC_EF_POLL
     */
    GET_ENV_OPT_INT("EF_POLL_USEC", ul_spin_usec);
    GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC", sleep_spin_usec);
    opts->ul_select_spin = 1;
    opts->ul_poll_spin = 1;
#if CI_CFG_USERSPACE_EPOLL
    opts->ul_epoll_spin = 1;
#endif
#if CI_CFG_UDP
    opts->udp_recv_spin = 1;
    opts->udp_send_spin = 1;
#endif
    opts->tcp_recv_spin = 1;
    opts->tcp_send_spin = 1;
    opts->pkt_wait_spin = 1;
    opts->sock_lock_buzz = 1;
    opts->stack_lock_buzz = 1;
  }

  if( (s = getenv("EF_BUZZ_USEC")) && atoi(s) ) {
    opts->sock_lock_buzz = 1;
    opts->stack_lock_buzz = 1;
  }

  GET_ENV_OPT_HEX("EF_UNIX_LOG",	log_level);
  GET_ENV_OPT_INT("EF_PROBE",		probe);
  GET_ENV_OPT_INT("EF_TCP",		ul_tcp);
  GET_ENV_OPT_INT("EF_UDP",		ul_udp);
  GET_ENV_OPT_INT("EF_UL_SELECT",	ul_select);
  GET_ENV_OPT_INT("EF_SELECT_SPIN",	ul_select_spin);
  GET_ENV_OPT_INT("EF_SELECT_FAST",	ul_select_fast);
  GET_ENV_OPT_INT("EF_UL_POLL",		ul_poll);
  GET_ENV_OPT_INT("EF_POLL_SPIN",	ul_poll_spin);
  GET_ENV_OPT_INT("EF_POLL_FAST",	ul_poll_fast);
  GET_ENV_OPT_INT("EF_POLL_FAST_USEC",  ul_poll_fast_usec);
  GET_ENV_OPT_INT("EF_POLL_NONBLOCK_FAST_USEC", ul_poll_nonblock_fast_usec);
  GET_ENV_OPT_INT("EF_SELECT_FAST_USEC",  ul_select_fast_usec);
  GET_ENV_OPT_INT("EF_SELECT_NONBLOCK_FAST_USEC", ul_select_nonblock_fast_usec);
#if CI_CFG_UDP
  GET_ENV_OPT_INT("EF_UDP_RECV_SPIN",   udp_recv_spin);
  GET_ENV_OPT_INT("EF_UDP_SEND_SPIN",   udp_send_spin);
#endif
  GET_ENV_OPT_INT("EF_TCP_RECV_SPIN",   tcp_recv_spin);
  GET_ENV_OPT_INT("EF_TCP_SEND_SPIN",   tcp_send_spin);
  GET_ENV_OPT_INT("EF_TCP_ACCEPT_SPIN", tcp_accept_spin);
  GET_ENV_OPT_INT("EF_TCP_CONNECT_SPIN",tcp_connect_spin);
  GET_ENV_OPT_INT("EF_PKT_WAIT_SPIN",   pkt_wait_spin);
#if CI_CFG_USERSPACE_PIPE
  GET_ENV_OPT_INT("EF_PIPE_RECV_SPIN",  pipe_recv_spin);
  GET_ENV_OPT_INT("EF_PIPE_SEND_SPIN",  pipe_send_spin);
  GET_ENV_OPT_INT("EF_PIPE_SIZE",       pipe_size);
#endif
  GET_ENV_OPT_INT("EF_SOCK_LOCK_BUZZ",  sock_lock_buzz);
  GET_ENV_OPT_INT("EF_STACK_LOCK_BUZZ", stack_lock_buzz);
  GET_ENV_OPT_INT("EF_SO_BUSY_POLL_SPIN", so_busy_poll_spin);
#if CI_CFG_USERSPACE_EPOLL
  GET_ENV_OPT_INT("EF_UL_EPOLL",        ul_epoll);
  GET_ENV_OPT_INT("EF_EPOLL_SPIN",      ul_epoll_spin);
  GET_ENV_OPT_INT("EF_EPOLL_CTL_FAST",  ul_epoll_ctl_fast);
  GET_ENV_OPT_INT("EF_EPOLL_CTL_HANDOFF",ul_epoll_ctl_handoff);
  GET_ENV_OPT_INT("EF_EPOLL_MT_SAFE",   ul_epoll_mt_safe);
  GET_ENV_OPT_INT("EF_WODA_SINGLE_INTERFACE", woda_single_if);
#endif
  GET_ENV_OPT_INT("EF_FDTABLE_SIZE",	fdtable_size);
  GET_ENV_OPT_INT("EF_SPIN_USEC",	ul_spin_usec);
  GET_ENV_OPT_INT("EF_SLEEP_SPIN_USEC",	sleep_spin_usec);
  GET_ENV_OPT_INT("EF_STACK_PER_THREAD",stack_per_thread);
  GET_ENV_OPT_INT("EF_DONT_ACCELERATE",	dont_accelerate);
  GET_ENV_OPT_INT("EF_FDTABLE_STRICT",	fdtable_strict);
  GET_ENV_OPT_INT("EF_FDS_MT_SAFE",	fds_mt_safe);
  GET_ENV_OPT_INT("EF_NO_FAIL",		no_fail);
  GET_ENV_OPT_INT("EF_SA_ONSTACK_INTERCEPT",	sa_onstack_intercept);
  GET_ENV_OPT_INT("EF_ACCEPT_INHERIT_NONBLOCK",	accept_force_inherit_nonblock);
  GET_ENV_OPT_INT("EF_VFORK_MODE",	vfork_mode);
#if CI_CFG_USERSPACE_PIPE
  GET_ENV_OPT_INT("EF_PIPE",        ul_pipe);
#endif
  GET_ENV_OPT_INT("EF_SYNC_CPLANE_AT_CREATE",	sync_cplane);

  if( (s = getenv("EF_FORK_NETIF")) && sscanf(s, "%x", &v) == 1 ) {
    opts->fork_netif = CI_MIN(v, CI_UNIX_FORK_NETIF_BOTH);
  }
  if( (s = getenv("EF_NETIF_DTOR")) && sscanf(s, "%x", &v) == 1 ) {
    opts->netif_dtor = CI_MIN(v, CITP_NETIF_DTOR_ALL);
  }

  if( (s = getenv("EF_SIGNALS_NOPOSTPONE")) ) {
    opts->signals_no_postpone = 0;
    while( sscanf(s, "%u", &v) == 1 ) {
      opts->signals_no_postpone |= (1 << (v-1));
      s = strchr(s, ',');
      if( s == NULL )
        break;
      s++;
    }
  }

  if( (s = getenv("EF_CLUSTER_NAME")) ) {
    strncpy(opts->cluster_name, s, CI_CFG_CLUSTER_NAME_LEN);
    opts->cluster_name[CI_CFG_CLUSTER_NAME_LEN] = '\0';
  }
  else {
    opts->cluster_name[0] = '\0';
  }
  GET_ENV_OPT_INT("EF_CLUSTER_SIZE",	cluster_size);
  if( opts->cluster_size < 1 )
    log("ERROR: cluster_size needs to be a positive number");
  GET_ENV_OPT_INT("EF_CLUSTER_RESTART",	cluster_restart_opt);
  GET_ENV_OPT_INT("EF_CLUSTER_HOT_RESTART", cluster_hot_restart_opt);
  get_env_opt_port_list(&opts->tcp_reuseports, "EF_TCP_FORCE_REUSEPORT");
  get_env_opt_port_list(&opts->udp_reuseports, "EF_UDP_FORCE_REUSEPORT");

#if CI_CFG_FD_CACHING
  get_env_opt_port_list(&opts->sock_cache_ports, "EF_SOCKET_CACHE_PORTS");
#endif

  GET_ENV_OPT_INT("EF_ONLOAD_FD_BASE",	fd_base);

}
Ejemplo n.º 13
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }