Ejemplo n.º 1
0
static int
efab_tcp_drop_from_acceptq(ci_private_t *priv, void *arg)
{
  struct oo_op_tcp_drop_from_acceptq *carg = arg;
  tcp_helper_resource_t *thr;
  tcp_helper_endpoint_t *ep;
  citp_waitable *w;
  ci_tcp_state *ts;
  int rc = -EINVAL;

  /* find stack */
  rc = efab_thr_table_lookup(NULL, carg->stack_id,
                                 EFAB_THR_TABLE_LOOKUP_CHECK_USER |
                                 EFAB_THR_TABLE_LOOKUP_NO_UL,
                                 &thr);

  if( rc < 0 )
    return rc;
  ci_assert( thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND );

  /* find endpoint and drop OS socket */
  ep = ci_trs_get_valid_ep(thr, carg->sock_id);
  if( ep == NULL )
    goto fail1;

  w = SP_TO_WAITABLE(&thr->netif, carg->sock_id);
  if( !(w->state & CI_TCP_STATE_TCP) || w->state == CI_TCP_LISTEN )
    goto fail2;
  ts = SP_TO_TCP(&thr->netif, carg->sock_id);
  ci_assert(ep->os_port_keeper);
  ci_assert_equal(ep->os_socket, NULL);

  LOG_TV(ci_log("%s: send reset to non-accepted connection", __FUNCTION__));

  /* copy from ci_tcp_listen_shutdown_queues() */
  ci_assert(ts->s.b.sb_aflags & CI_SB_AFLAG_TCP_IN_ACCEPTQ);
  rc = ci_netif_lock(&thr->netif);
  if( rc != 0 ) {
    ci_assert_equal(rc, -EINTR);
    rc = -ERESTARTSYS;
    goto fail2;
  }
  ci_bit_clear(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
  /* We have no way to close this connection from the other side:
   * there was no RST from peer. */
  ci_assert_nequal(ts->s.b.state, CI_TCP_CLOSED);
  ci_assert_nequal(ts->s.b.state, CI_TCP_TIME_WAIT);
  ci_tcp_send_rst(&thr->netif, ts);
  ci_tcp_drop(&thr->netif, ts, ECONNRESET);
  ci_assert_equal(ep->os_port_keeper, NULL);
  ci_netif_unlock(&thr->netif);
  efab_tcp_helper_k_ref_count_dec(thr, 1);
  return 0;

fail1:
  efab_thr_release(thr);
fail2:
  ci_log("%s: inconsistent ep %d:%d", __func__, carg->stack_id, carg->sock_id);
  return rc;
}
Ejemplo n.º 2
0
static void citp_fdinfo_do_handover(citp_fdinfo* fdi, int fdt_locked)
{
  int rc;
  citp_fdinfo* epoll_fdi = NULL;
  int os_fd = fdi->fd;
#ifndef NDEBUG
  /* Yuk: does for UDP too. */
  volatile citp_fdinfo_p* p_fdip;
  p_fdip = &citp_fdtable.table[fdi->fd].fdip;
  ci_assert(fdip_is_busy(*p_fdip));
#endif


  Log_V(ci_log("%s: fd=%d nonb_switch=%d", __FUNCTION__, fdi->fd,
	       fdi->on_rcz.handover_nonb_switch));

  if( fdi->epoll_fd >= 0 ) {
    epoll_fdi = citp_epoll_fdi_from_member(fdi, fdt_locked);
    if( epoll_fdi->protocol->type == CITP_EPOLLB_FD )
      citp_epollb_on_handover(epoll_fdi, fdi);
  }
  rc = fdtable_fd_move(fdi->fd, OO_IOC_TCP_HANDOVER);
  if( rc == -EBUSY && fdi->epoll_fd >= 0 ) {
    ci_assert(fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags &
              CI_SB_AFLAG_MOVED_AWAY);
    /* If this is our epoll, we can do full handover: we manually add os
     * fd into the epoll set.
     * Fixme: ensure we are not in _other_ epoll sets */
    ci_bit_clear(&fdi_to_sock_fdi(fdi)->sock.s->b.sb_aflags,
                 CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);
    rc = fdtable_fd_move(fdi->fd, OO_IOC_FILE_MOVED);
  }
  if( rc != 0 ) {
    citp_fdinfo* new_fdi;
    if( ! fdt_locked ) CITP_FDTABLE_LOCK();
    new_fdi = citp_fdtable_probe_locked(fdi->fd, CI_TRUE, CI_TRUE);
    citp_fdinfo_release_ref(new_fdi, 1);
    if( ! fdt_locked ) CITP_FDTABLE_UNLOCK();
    ci_assert_equal(citp_fdinfo_get_type(new_fdi), CITP_PASSTHROUGH_FD);
    os_fd = fdi_to_alien_fdi(new_fdi)->os_socket;
  }
  if( fdi->on_rcz.handover_nonb_switch >= 0 ) {
    int on_off = !! fdi->on_rcz.handover_nonb_switch;
    int rc = ci_sys_ioctl(os_fd, FIONBIO, &on_off);
    if( rc < 0 )
      Log_E(ci_log("%s: ioctl failed on_off=%d", __FUNCTION__, on_off));
  }
  if( rc != 0 )
    goto exit;
  citp_fdtable_busy_clear(fdi->fd, fdip_passthru, fdt_locked);
exit:
  citp_fdinfo_get_ops(fdi)->dtor(fdi, fdt_locked);
  if( epoll_fdi != NULL && epoll_fdi->protocol->type == CITP_EPOLL_FD )
    citp_epoll_on_handover(epoll_fdi, fdi, fdt_locked);
  if( epoll_fdi != NULL )
    citp_fdinfo_release_ref(epoll_fdi, fdt_locked);
  citp_fdinfo_free(fdi);
}
Ejemplo n.º 3
0
static int oo_epoll2_ctl(struct oo_epoll_private *priv, int op_kepfd,
                         int op_op, int op_fd, struct epoll_event *op_event)
{
  tcp_helper_resource_t *fd_thr;
  struct file *file;
  int rc;
  ci_uint32 fd_sock_id;
  citp_waitable *fd_w;

  /* We are interested in ADD only */
  if( op_op != EPOLL_CTL_ADD )
    return efab_linux_sys_epoll_ctl(op_kepfd, op_op, op_fd, op_event);

  /* system poll() and friends use fget_light(), which is cheap.
   * But they do not export fget_light to us, so we have to use fget(). */
  file = fget(op_fd);
  if(unlikely( file == NULL ))
    return -EBADF;

  /* Check for the dead circle.
   * We should check that we are not adding ourself. */
  if(unlikely( file->private_data == priv )) {
    fput(file);
    return -EINVAL;
  }

  /* Is op->fd ours and if yes, which netif it has? */
  /* Fixme: epoll fd - do we want to accelerate something? */
  if( file->f_op != &linux_tcp_helper_fops_udp &&
      file->f_op != &linux_tcp_helper_fops_tcp ) {
    int rc;
#ifdef OO_EPOLL_NEED_NEST_PROTECTION
    struct oo_epoll_busy_task t;
    t.task = current;
    spin_lock(&priv->lock);
    list_add(&t.link, &priv->p.p2.busy_tasks);
    spin_unlock(&priv->lock);
#endif

#if CI_CFG_USERSPACE_PIPE
    if( ( file->f_op == &linux_tcp_helper_fops_pipe_reader ||
          file->f_op == &linux_tcp_helper_fops_pipe_writer ) )
      priv->p.p2.do_spin = 1;
#endif
    fput(file);
    rc = efab_linux_sys_epoll_ctl(op_kepfd, op_op, op_fd, op_event);
#ifdef OO_EPOLL_NEED_NEST_PROTECTION
      spin_lock(&priv->lock);
      list_del(&t.link);
      spin_unlock(&priv->lock);
#endif
    return rc;
  }

  /* Onload socket here! */
  fd_thr = ((ci_private_t *)file->private_data)->thr;
  fd_sock_id = ((ci_private_t *)file->private_data)->sock_id;
  priv->p.p2.do_spin = 1;

  if(unlikely( ! oo_epoll_add_stack(priv, fd_thr) )) {
    static int printed;
    if( !printed )
      ci_log("Can't add stack %d to epoll set: consider "
             "increasing epoll_max_stacks module option", fd_thr->id);
    /* fall through to sys_epoll_ctl() without interrupt */
  }

  /* Let kernel add fd to the epoll set, but ask endpoint to avoid enabling
   * interrupts.
   * And we keep file ref while using fd_w to avoid nasty things. */
  fd_w = SP_TO_WAITABLE(&fd_thr->netif, fd_sock_id);
  ci_bit_set(&fd_w->sb_aflags, CI_SB_AFLAG_AVOID_INTERRUPTS_BIT);
  rc = efab_linux_sys_epoll_ctl(op_kepfd, op_op, op_fd, op_event);
  ci_bit_clear(&fd_w->sb_aflags, CI_SB_AFLAG_AVOID_INTERRUPTS_BIT);
  fput(file);

  return rc;
}
Ejemplo n.º 4
0
/* Move priv file to the alien_ni stack.
 * Should be called with the locked priv stack and socket;
 * the function returns with this stack being unlocked.
 * If rc=0, it returns with alien_ni stack locked;
 * otherwise, both stacks are unlocked.
 * Socket is always unlocked on return. */
int efab_file_move_to_alien_stack(ci_private_t *priv, ci_netif *alien_ni)
{
  tcp_helper_resource_t *old_thr = priv->thr;
  tcp_helper_resource_t *new_thr = netif2tcp_helper_resource(alien_ni);
  ci_sock_cmn *old_s = SP_TO_SOCK(&old_thr->netif, priv->sock_id);
  ci_sock_cmn *new_s;
  ci_sock_cmn *mid_s;
  tcp_helper_endpoint_t *old_ep, *new_ep;
  int rc, i;
  int pollwait_register = 0;
#if CI_CFG_FD_CACHING
  oo_p sp;
#endif

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d", __func__,
                       old_thr->id, priv->sock_id, new_thr->id));
  /* Poll the old stack - deliver all data to our socket */
  ci_netif_poll(&old_thr->netif);

  /* Endpoints in epoll list should not be moved, because waitq is already
   * in the epoll internal structures (bug 41152). */
  if( !list_empty(&priv->_filp->f_ep_links) ) {
    rc = -EBUSY;
    goto fail1;
  }

  if( !efab_file_move_supported(&old_thr->netif, old_s) ) {
    rc = -EINVAL;
    goto fail1;
  }

  /* Lock the second stack */
  i = 0;
  while( ! ci_netif_trylock(alien_ni) ) {
    ci_netif_unlock(&old_thr->netif);
    if( i++ >= 1000 ) {
      rc = -EBUSY;
      goto fail1_ni_unlocked;
    }
    rc = ci_netif_lock(&old_thr->netif);
    if( rc != 0 )
      goto fail1_ni_unlocked;
  }

  /* Allocate a new socket in the alien_ni stack */
  rc = -ENOMEM;
  if( old_s->b.state == CI_TCP_STATE_UDP ) {
    ci_udp_state *new_us = ci_udp_get_state_buf(alien_ni);
    if( new_us == NULL )
      goto fail2;
    new_s = &new_us->s;
  }
  else {
    ci_tcp_state *new_ts = ci_tcp_get_state_buf(alien_ni);
    if( new_ts == NULL )
      goto fail2;
    new_s = &new_ts->s;
  }

  /* Allocate an intermediate "socket" outside of everything */
  mid_s = ci_alloc(CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));
  if( mid_s == NULL )
    goto fail3;

  OO_DEBUG_TCPH(ci_log("%s: move %d:%d to %d:%d", __func__,
                       old_thr->id, priv->sock_id,
                       new_thr->id, new_s->b.bufid));

  /* Copy TCP/UDP state */
  memcpy(mid_s, old_s, CI_MAX(sizeof(ci_tcp_state), sizeof(ci_udp_state)));

  /* do not copy old_s->b.bufid
   * and other fields in stack adress space */
  mid_s->b.sb_aflags |= CI_SB_AFLAG_ORPHAN;
  mid_s->b.bufid = new_s->b.bufid;
  mid_s->b.post_poll_link = new_s->b.post_poll_link;
  mid_s->b.ready_link = new_s->b.ready_link;
  mid_s->reap_link = new_s->reap_link;

  if( old_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *mid_ts = SOCK_TO_TCP(mid_s);

    mid_ts->timeout_q_link = new_ts->timeout_q_link;
    mid_ts->tx_ready_link = new_ts->tx_ready_link;
    mid_ts->rto_tid = new_ts->rto_tid;
    mid_ts->delack_tid = new_ts->delack_tid;
    mid_ts->zwin_tid = new_ts->zwin_tid;
    mid_ts->kalive_tid = new_ts->kalive_tid;
    mid_ts->cork_tid = new_ts->cork_tid;
    ci_ip_queue_init(&mid_ts->recv1);
    ci_ip_queue_init(&mid_ts->recv2);
    ci_ip_queue_init(&mid_ts->send);
    ci_ip_queue_init(&mid_ts->retrans);
    mid_ts->send_prequeue = OO_PP_ID_NULL;
    new_ts->retrans_ptr = OO_PP_NULL;
    mid_ts->tmpl_head = OO_PP_NULL;
    oo_atomic_set(&mid_ts->send_prequeue_in, 0);

    *new_ts = *mid_ts;
    ci_pmtu_state_init(alien_ni, &new_ts->s, &new_ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
#if CI_CFG_FD_CACHING
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_link, sp, "epch");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_link);
    sp = TS_OFF(alien_ni, new_ts);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_state, epcache_fd_link));
    ci_ni_dllist_link_init(alien_ni, &new_ts->epcache_fd_link, sp, "ecfd");
    ci_ni_dllist_self_link(alien_ni, &new_ts->epcache_fd_link);
#endif
   
    /* free temporary mid_ts storage */
    CI_FREE_OBJ(mid_ts);
  }
  else {
    ci_udp_state *mid_us = SOCK_TO_UDP(mid_s);

    *SOCK_TO_UDP(new_s) = *mid_us;
    CI_FREE_OBJ(mid_us);
  }

  /* Move the filter */
  old_ep = ci_trs_ep_get(old_thr, priv->sock_id);
  new_ep = ci_trs_ep_get(new_thr, new_s->b.bufid);
  rc = tcp_helper_endpoint_move_filters_pre(old_ep, new_ep);
  if( rc != 0 ) {
    rc = -EINVAL;
    goto fail3;
  }

  /* Allocate a new file for the new endpoint */
  rc = onload_alloc_file(new_thr, new_s->b.bufid, priv->_filp->f_flags,
                         priv->fd_type, &old_ep->alien_ref);
  if( rc != 0 )
    goto fail4;
  ci_assert(old_ep->alien_ref);

  /* Copy F_SETOWN_EX, F_SETSIG to the new file */
#ifdef F_SETOWN_EX
  rcu_read_lock();
  __f_setown(old_ep->alien_ref->_filp, priv->_filp->f_owner.pid,
             priv->_filp->f_owner.pid_type, 1);
  rcu_read_unlock();
#endif
  old_ep->alien_ref->_filp->f_owner.signum = priv->_filp->f_owner.signum;
  old_ep->alien_ref->_filp->f_flags |= priv->_filp->f_flags & O_NONBLOCK;

  /* Move os_socket from one ep to another */
  if( tcp_helper_endpoint_set_aflags(new_ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED ) {
    fput(old_ep->alien_ref->_filp);
    rc = -EBUSY;
    goto fail2; /* state & filters are cleared by fput() */
  }

  /********* Point of no return  **********/
  ci_wmb();
  priv->fd_type = CI_PRIV_TYPE_ALIEN_EP;
  priv->_filp->f_op = &linux_tcp_helper_fops_alien;
  ci_wmb();
  oo_file_moved(priv);

  /* Read all already-arrived packets after the filters move but before
   * copying of the receive queue. */
  ci_netif_poll(&old_thr->netif);
  tcp_helper_endpoint_move_filters_post(old_ep, new_ep);
  ci_assert( efab_file_move_supported(&old_thr->netif, old_s));

  /* There's a gap between un-registering the old ep, and registering the
   * the new.  However, the notifications shouldn't be in use for sockets
   * that are in a state that can be moved, so this shouldn't be a problem.
   */
  if( old_ep->os_sock_pt.whead ) {
    pollwait_register = 1;
    efab_tcp_helper_os_pollwait_unregister(old_ep);
  }
  ci_assert_equal(new_ep->os_socket, NULL);
  new_ep->os_socket = oo_file_ref_xchg(&old_ep->os_socket, NULL);
  ci_assert_equal(old_ep->os_socket, NULL);
  if( pollwait_register )
    efab_tcp_helper_os_pollwait_register(new_ep);

  ci_bit_clear(&new_s->b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
  if( new_s->b.state == CI_TCP_ESTABLISHED )
    CI_TCP_STATS_INC_CURR_ESTAB(alien_ni);


  /* Copy recv queue */
  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    ci_tcp_state *old_ts = SOCK_TO_TCP(old_s);
    int i;

    /* Stop timers */
    ci_ip_timer_clear(&old_thr->netif, &old_ts->kalive_tid);
    ci_ip_timer_clear(&old_thr->netif, &old_ts->delack_tid);

    efab_ip_queue_copy(alien_ni, &new_ts->recv1,
                       &old_thr->netif, &old_ts->recv1);
    efab_ip_queue_copy(alien_ni, &new_ts->recv2,
                       &old_thr->netif, &old_ts->recv2);
    new_ts->recv1_extract = new_ts->recv1.head;

    /* Drop reorder buffer */
    ci_ip_queue_init(&new_ts->rob);
    new_ts->dsack_block = OO_PP_INVALID;
    new_ts->dsack_start = new_ts->dsack_end = 0;
    for( i = 0; i <= CI_TCP_SACK_MAX_BLOCKS; i++ )
      new_ts->last_sack[i] = OO_PP_NULL;
  }
  else {
    /* There should not be any recv q, but drop it to be sure */
    ci_udp_recv_q_init(&SOCK_TO_UDP(new_s)->recv_q);
  }

  /* Old stack can be unlocked */
  old_s->b.sb_flags |= CI_SB_FLAG_MOVED;
  ci_netif_unlock(&old_thr->netif);

  ci_assert( efab_file_move_supported(alien_ni, new_s) );

  /* Move done: poll for any new data. */
  ci_netif_poll(alien_ni);

  if( new_s->b.state & CI_TCP_STATE_TCP ) {
    ci_tcp_state *new_ts = SOCK_TO_TCP(new_s);
    /* Timers setup: delack, keepalive */
    if( (new_ts->acks_pending & CI_TCP_ACKS_PENDING_MASK) > 0)
      ci_tcp_timeout_delack(alien_ni, new_ts);
    ci_tcp_kalive_reset(alien_ni, new_ts);
  }


  /* Old ep: we are done. */
  ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_BIT);
  old_s->b.moved_to_stack_id = alien_ni->state->stack_id;
  old_s->b.moved_to_sock_id = new_s->b.bufid;
  if( ! list_empty(&priv->_filp->f_ep_links) )
    ci_bit_set(&old_s->b.sb_aflags, CI_SB_AFLAG_MOVED_AWAY_IN_EPOLL_BIT);

  ci_sock_unlock(&old_thr->netif, &old_s->b);
  ci_sock_unlock(alien_ni, &new_s->b);
  ci_assert(ci_netif_is_locked(alien_ni));
  OO_DEBUG_TCPH(ci_log("%s: -> [%d:%d] %s", __func__,
                       new_thr->id, new_s->b.bufid,
                       ci_tcp_state_str(new_s->b.state)));
  return 0;

fail4:
  /* We clear the filters from the new ep.
   * For now, we do not need to re-insert old filters because hw filters
   * are alredy here (in case of accepted socket) or not needed.
   * We have not removed old sw filters yet. */
  tcp_helper_endpoint_move_filters_undo(old_ep, new_ep);
fail3:
  if( new_s->b.state & CI_TCP_STATE_TCP )
    ci_tcp_state_free(alien_ni, SOCK_TO_TCP(new_s));
  else
    ci_udp_state_free(alien_ni, SOCK_TO_UDP(new_s));
fail2:
  ci_netif_unlock(alien_ni);
fail1:
  ci_netif_unlock(&old_thr->netif);
fail1_ni_unlocked:
  ci_sock_unlock(&old_thr->netif, &old_s->b);
  OO_DEBUG_TCPH(ci_log("%s: rc=%d", __func__, rc));
  return rc;
}
Ejemplo n.º 5
0
/*! Copy socket options and related fields that should be inherited.
 * Inherits into [ts] from [s] & [c]. Options are inherited during EP
 * promotion for unix, during accept handler in Windows & as a result of
 * setsockopt:SOL_SOCKET:SO_UPDATE_ACCEPT_CONTEXT.  MUST have a lock on
 * [ts].  [or_nonblock] controls whether the non-blocking state from [s]
 * overwrites that in [ts] or is OR'd into it.
 */
static void ci_tcp_inherit_options(ci_netif* ni, ci_sock_cmn* s,
                                   ci_tcp_socket_cmn* c, 
                                   ci_tcp_state* ts, const char* ctxt)
{
  ci_assert(ni);
  ci_assert(s);
  ci_assert(c);
  ci_assert(ts);

  ts->s.so = s->so;
  ts->s.cp.so_bindtodevice = s->cp.so_bindtodevice;
  ts->s.cp.ip_ttl = s->cp.ip_ttl;
  ts->s.rx_bind2dev_ifindex = s->rx_bind2dev_ifindex;
  ts->s.rx_bind2dev_base_ifindex = s->rx_bind2dev_base_ifindex;
  ts->s.rx_bind2dev_vlan = s->rx_bind2dev_vlan;
  ci_tcp_set_sndbuf(ni, ts);      /* eff_mss must be valid */
  ci_tcp_set_rcvbuf(ni, ts);      /* and amss, and rcv_wscl */

  {
    /* NB. We have exclusive access to [ts], so it is safe to manipulate
    ** s_aflags without using bit-ops. */
    unsigned inherited_sflags = CI_SOCK_AFLAG_TCP_INHERITED;
    unsigned inherited_sbflags = 0;

    if( NI_OPTS(ni).accept_inherit_nonblock )
      inherited_sbflags |= CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY;

    ci_assert((ts->s.s_aflags & inherited_sflags) == 0);
    ci_atomic32_or(&ts->s.s_aflags, s->s_aflags & inherited_sflags);

    if( NI_OPTS(ni).tcp_force_nodelay == 1 )
      ci_bit_set(&ts->s.s_aflags, CI_SOCK_AFLAG_NODELAY_BIT);
    else if( NI_OPTS(ni).tcp_force_nodelay == 2 )
      ci_bit_clear(&ts->s.s_aflags, CI_SOCK_AFLAG_NODELAY_BIT);

    ci_assert((ts->s.b.sb_aflags & inherited_sbflags) == 0);
    ci_atomic32_or(&ts->s.b.sb_aflags, s->b.sb_aflags & inherited_sbflags);

    ci_assert_equal((ts->s.s_flags & CI_SOCK_FLAG_TCP_INHERITED),
                    CI_SOCK_FLAG_PMTU_DO);
    ts->s.s_flags &= ~CI_SOCK_FLAG_PMTU_DO;
    ts->s.s_flags |= s->s_flags & CI_SOCK_FLAG_TCP_INHERITED;
  }

  /* Bug1861: while not defined as such, various SOL_TCP/SOL_IP sockopts
   * are inherited in Linux. */
  /* TCP_KEEPIDLE, TCP_KEEPINTVL, TCP_KEEPCNT */
  ts->c.t_ka_time          = c->t_ka_time;
  ts->c.t_ka_time_in_secs  = c->t_ka_time_in_secs;
  ts->c.t_ka_intvl         = c->t_ka_intvl;
  ts->c.t_ka_intvl_in_secs = c->t_ka_intvl_in_secs;
  ts->c.ka_probe_th        = c->ka_probe_th;
  ci_ip_hdr_init_fixed(&ts->s.pkt.ip, IPPROTO_TCP,
                        s->pkt.ip.ip_ttl,
                        s->pkt.ip.ip_tos);
  ts->s.cmsg_flags = s->cmsg_flags;
  ts->s.timestamping_flags = s->timestamping_flags;

  /* Must have set up so.sndbuf */
  ci_tcp_init_rcv_wnd(ts, ctxt);
}
Ejemplo n.º 6
0
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog)
{
  /* 
  ** ?? error handling on possible fails not handled robustly...
  ** ?? Need to check port number is valid TODO
  */

  /*! \todo If not bound then we have to be listening on all interfaces.
   * It's likely that we won't be coming through here as we have to
   * listen on the OS socket too! */
  ci_tcp_state* ts;
  ci_tcp_socket_listen* tls;
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  unsigned ul_backlog = backlog;
  int rc;
  oo_p sp;

  LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), 
             backlog));
  CHECK_TEP(ep);

  if( NI_OPTS(netif).tcp_listen_handover )
    return CI_SOCKET_HANDOVER;
  if( !NI_OPTS(netif).tcp_server_loopback) {
    /* We should handover if the socket is bound to alien address. */
    if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
      return CI_SOCKET_HANDOVER;
  }

  if( ul_backlog < 0 )
    ul_backlog = NI_OPTS(netif).max_ep_bufs;
  else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog )
    ul_backlog = NI_OPTS(netif).acceptq_min_backlog;

  if( s->b.state == CI_TCP_LISTEN ) {
    tls = SOCK_TO_TCP_LISTEN(s);
    tls->acceptq_max = ul_backlog;
    ci_tcp_helper_listen_os_sock(fd, ul_backlog);
    return 0;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    CI_SET_ERROR(rc, EINVAL);
    return rc;
  }


  ts = SOCK_TO_TCP(s);

  /* Bug 3376: if socket used for a previous, failed, connect then the error
   * numbers will not be as expected.  Only seen when not using listening
   * netifs (as moving the EP to the new netif resets them). 
   */

  ts->s.tx_errno = EPIPE;



  ts->s.rx_errno = ENOTCONN;

  /* fill in address/ports and all TCP state */
  if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) {
    ci_uint16 source_be16;

    /* They haven't previously done a bind, so we need to choose 
     * a port.  As we haven't been given a hint we let the OS choose. */

    source_be16 = 0;
    rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    if (CI_LIKELY( rc==0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ep->netif, ts),
                 ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));

    } else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      return rc;
    }
  } 

  ci_sock_lock(netif, &ts->s.b);
  ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);

  tcp_raddr_be32(tls) = 0u;
  tcp_rport_be16(tls) = 0u;

  ci_assert_equal(tls->s.tx_errno, EPIPE);



  ci_assert_equal(tls->s.rx_errno, ENOTCONN);

  /* setup listen timer - do it before the first return statement,
   * because __ci_tcp_listen_to_normal() will be called on error path. */
  if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
    sp = TS_OFF(netif, tls);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid));
    ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq");
    tls->listenq_tid.param1 = S_SP(tls);
    tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN;
  }

  rc = ci_tcp_listen_init(netif, tls);
  ci_sock_unlock(netif, &ts->s.b);
  if( rc != 0 ) {
    CI_SET_ERROR(rc, -rc);
    goto listen_fail;
  }
  tls->acceptq_max = ul_backlog;

  CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats));

  /* install all the filters needed for this connection 
   *    - tcp_laddr_be32(ts) = 0 for IPADDR_ANY
   *
   *  TODO: handle BINDTODEVICE by setting phys_port paramter to correct 
   *        physical L5 port index
   *  TODO: handle REUSEADDR by setting last paramter to TRUE
   */
  if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
#ifdef ONLOAD_OFE
    if( netif->ofe != NULL ) {
      tls->s.ofe_code_start = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_LISTEN,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
      tls->ofe_promote = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
    }
#endif
    rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
      if( CITP_OPTS.no_fail )
        rc = 0;
      else {
        ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
        rc = -ENOBUFS;
      }
    }
    ci_assert_nequal(rc, -EFILTERSSOME);
    VERB(ci_log("%s: set_filters  returned %d", __FUNCTION__, rc));
    if (rc < 0) {
      CI_SET_ERROR(rc, -rc);
      goto post_listen_fail;
    }
  }


  /* 
   * Call of system listen() is required for listen any, local host
   * communications server and multi-homed server (to accept connections
   * to L5 assigned address(es), but incoming from other interfaces).
   */
#ifdef __ci_driver__
  {
    rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif),
					 S_SP(tls), backlog);
  }
#else
  rc = ci_tcp_helper_listen_os_sock(fd, backlog);
#endif
  if ( rc < 0 ) {
    /* clear the filter we've just set */
    ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
    goto post_listen_fail;
  }
  return 0;

 post_listen_fail:
  ci_tcp_listenq_drop_all(netif, tls);
 listen_fail:
  /* revert TCP state to a non-listening socket format */
  __ci_tcp_listen_to_normal(netif, tls);
  /* Above function sets orphan flag but we are attached to an FD. */
  ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
#ifdef __ci_driver__
  return rc;
#else
  return CI_SOCKET_ERROR;
#endif
}