Exemple #1
0
citp_waitable_obj* citp_waitable_obj_alloc(ci_netif* netif)
{
  citp_waitable_obj* wo;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));

  if( netif->state->deferred_free_eps_head != CI_ILL_END ) {
    ci_uint32 link;
    do
      link = netif->state->deferred_free_eps_head;
    while( ci_cas32_fail(&netif->state->deferred_free_eps_head,
                         link, CI_ILL_END));
    while( link != CI_ILL_END ) {
      citp_waitable* w = ID_TO_WAITABLE(netif, link);
      link = w->next_id;
      CI_DEBUG(w->next_id = CI_ILL_END);
      ci_assert_equal(w->state, CI_TCP_STATE_FREE);
      ci_assert(OO_SP_IS_NULL(w->wt_next));
      w->wt_next = netif->state->free_eps_head;
      netif->state->free_eps_head = W_SP(w);
    }
  }

  if( OO_SP_IS_NULL(netif->state->free_eps_head) ) {
    ci_tcp_helper_more_socks(netif);

    if( OO_SP_IS_NULL(netif->state->free_eps_head) )
      ci_netif_timeout_reap(netif);
  }

  if( OO_SP_IS_NULL(netif->state->free_eps_head) )
    return NULL;

  LOG_TV(ci_log("%s: allocating %d", __FUNCTION__,
                OO_SP_FMT(netif->state->free_eps_head)));

  ci_assert(IS_VALID_SOCK_P(netif, netif->state->free_eps_head));
#if !defined(__KERNEL__) && !defined (CI_HAVE_OS_NOPAGE)
  ci_netif_mmap_shmbuf(netif,
                       (netif->state->free_eps_head >> EP_BUF_BLOCKSHIFT) + 1);
#endif
  wo = SP_TO_WAITABLE_OBJ(netif, netif->state->free_eps_head);

  ci_assert(OO_SP_EQ(W_SP(&wo->waitable), netif->state->free_eps_head));
  ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE);
  ci_assert_equal(wo->waitable.sb_aflags, (CI_SB_AFLAG_ORPHAN | CI_SB_AFLAG_NOT_READY));
  ci_assert_equal(wo->waitable.lock.wl_val, 0);

  netif->state->free_eps_head = wo->waitable.wt_next;
  CI_DEBUG(wo->waitable.wt_next = OO_SP_NULL);
  ci_assert_equal(wo->waitable.state, CI_TCP_STATE_FREE);

  return wo;
}
Exemple #2
0
int
efab_ioctl_get_ep(ci_private_t* priv, oo_sp sockp,
                  tcp_helper_endpoint_t** ep_out)
{
  ci_assert(ep_out != NULL);
  if( priv->thr == NULL || ! IS_VALID_SOCK_P(&priv->thr->netif, sockp) )
    return -EINVAL;
  *ep_out = ci_trs_ep_get(priv->thr, sockp);
  ci_assert(*ep_out != NULL);
  return 0;
}
Exemple #3
0
void citp_waitable_all_fds_gone(ci_netif* ni, oo_sp w_id)
{
  citp_waitable_obj* wo;

  ci_assert(ni);
  ci_assert(IS_VALID_SOCK_P(ni, w_id));
  ci_assert(ci_netif_is_locked(ni));

  wo = SP_TO_WAITABLE_OBJ(ni, w_id);
  ci_assert(wo->waitable.state != CI_TCP_STATE_FREE);

  LOG_NC(ci_log("%s: %d:%d %s", __FUNCTION__, NI_ID(ni), OO_SP_FMT(w_id),
		ci_tcp_state_str(wo->waitable.state)));

  /* listening socket is closed in blocking conext, see
   * efab_tcp_helper_close_endpoint().
   * CI_SB_AFLAG_ORPHAN is set earlier in this case.. */
  CI_DEBUG(if( (wo->waitable.sb_aflags & CI_SB_AFLAG_ORPHAN) &&
               wo->waitable.state != CI_TCP_LISTEN )
	     ci_log("%s: %d:%d already orphan", __FUNCTION__,
                    NI_ID(ni), OO_SP_FMT(w_id)));

  /* It's essential that an ORPHANed socket not be on the deferred
   * socket list, because the same link field is used as timewait
   * list, free list etc.  We must purge the deferred list before
   * setting the orphan flag.
   *
   * NB. This socket cannot now be added to the deferred list, because
   * no-one has a reference to it.
   */
  ci_netif_purge_deferred_socket_list(ni);
  ci_bit_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);

  /* We also need to remove the socket from the post-poll list.  It may
   * have been left there because the stack believes a wakeup is needed.
   */
  ci_ni_dllist_remove_safe(ni, &wo->waitable.post_poll_link);
  ci_ni_dllist_remove_safe(ni, &wo->waitable.ready_link);
  wo->waitable.ready_list_id = 0;

  citp_waitable_cleanup(ni, wo, 1);
}
Exemple #4
0
static int
efab_tcp_helper_pipe_attach(ci_private_t* priv, void *arg)
{
  oo_pipe_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  tcp_helper_endpoint_t* ep = NULL;
  int rc;

  OO_DEBUG_TCPH(ci_log("%s: ep_id=%d", __FUNCTION__, op->ep_id));
  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }

  /* Validate and find the endpoint. */
  if( ! IS_VALID_SOCK_P(&trs->netif, op->ep_id) )
    return -EINVAL;
  ep = ci_trs_get_valid_ep(trs, op->ep_id);
  if( tcp_helper_endpoint_set_aflags(ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED )
    return -EBUSY;

  rc = oo_create_fd(ep, op->flags, CI_PRIV_TYPE_PIPE_READER);
  if( rc < 0 ) {
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }
  op->rfd = rc;

  rc = oo_create_fd(ep, op->flags, CI_PRIV_TYPE_PIPE_WRITER);
  if( rc < 0 ) {
    efab_linux_sys_close(op->rfd);
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }
  op->wfd = rc;

  return 0;
}
Exemple #5
0
static int
efab_tcp_helper_sock_attach(ci_private_t* priv, void *arg)
{
  oo_sock_attach_t* op = arg;
  tcp_helper_resource_t* trs = priv->thr;
  tcp_helper_endpoint_t* ep = NULL;
  citp_waitable_obj *wo;
  int rc, flags, type = op->type;

/* SOCK_CLOEXEC and SOCK_NONBLOCK exist from 2.6.27 both */
#ifdef SOCK_TYPE_MASK
  BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
  flags = type & (SOCK_CLOEXEC | SOCK_NONBLOCK);
  type &= SOCK_TYPE_MASK;
# ifdef SOCK_NONBLOCK
    if( SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK) )
      flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
# endif
#else
  flags = 0;
#endif

  OO_DEBUG_TCPH(ci_log("%s: ep_id=%d", __FUNCTION__, op->ep_id));
  if( trs == NULL ) {
    LOG_E(ci_log("%s: ERROR: not attached to a stack", __FUNCTION__));
    return -EINVAL;
  }

  /* Validate and find the endpoint. */
  if( ! IS_VALID_SOCK_P(&trs->netif, op->ep_id) )
    return -EINVAL;
  ep = ci_trs_get_valid_ep(trs, op->ep_id);
  if( tcp_helper_endpoint_set_aflags(ep, OO_THR_EP_AFLAG_ATTACHED) &
      OO_THR_EP_AFLAG_ATTACHED )
    return -EBUSY;
  wo = SP_TO_WAITABLE_OBJ(&trs->netif, ep->id);

  /* create OS socket */
  if( op->domain != AF_UNSPEC ) {
    struct socket *sock;
    struct file *os_file;

    rc = sock_create(op->domain, type, 0, &sock);
    if( rc < 0 ) {
      LOG_E(ci_log("%s: ERROR: sock_create(%d, %d, 0) failed (%d)",
                   __FUNCTION__, op->domain, type, rc));
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return rc;
    }
    os_file = sock_alloc_file(sock, flags, NULL);
    if( IS_ERR(os_file) ) {
      LOG_E(ci_log("%s: ERROR: sock_alloc_file failed (%ld)",
                   __FUNCTION__, PTR_ERR(os_file)));
      sock_release(sock);
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return PTR_ERR(os_file);
    }
    rc = efab_attach_os_socket(ep, os_file);
    if( rc < 0 ) {
      LOG_E(ci_log("%s: ERROR: efab_attach_os_socket failed (%d)",
                   __FUNCTION__, rc));
      /* NB. efab_attach_os_socket() consumes [os_file] even on error. */
      tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
      return rc;
    }
    wo->sock.domain = op->domain;
    wo->sock.ino = ep->os_socket->file->f_dentry->d_inode->i_ino;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
    wo->sock.uid = ep->os_socket->file->f_dentry->d_inode->i_uid;
#else
    wo->sock.uid = __kuid_val(ep->os_socket->file->f_dentry->d_inode->i_uid);
#endif
  }

  /* Create a new file descriptor to attach the stack to. */
  ci_assert((wo->waitable.state & CI_TCP_STATE_TCP) ||
            wo->waitable.state == CI_TCP_STATE_UDP);
  rc = oo_create_fd(ep, flags,
                    (wo->waitable.state & CI_TCP_STATE_TCP) ?
                    CI_PRIV_TYPE_TCP_EP : CI_PRIV_TYPE_UDP_EP);
  if( rc < 0 ) {
    ci_irqlock_state_t lock_flags;
    struct oo_file_ref* os_socket;
    ci_irqlock_lock(&ep->thr->lock, &lock_flags);
    os_socket = ep->os_socket;
    ep->os_socket = NULL;
    ci_irqlock_unlock(&ep->thr->lock, &lock_flags);
    if( os_socket != NULL )
      oo_file_ref_drop(os_socket);
    tcp_helper_endpoint_clear_aflags(ep, OO_THR_EP_AFLAG_ATTACHED);
    return rc;
  }

  op->fd = rc;
#ifdef SOCK_NONBLOCK
  if( op->type & SOCK_NONBLOCK )
    ci_bit_mask_set(&wo->waitable.sb_aflags, CI_SB_AFLAG_O_NONBLOCK);
#endif

  /* Re-read the OS socket buffer size settings.  This ensures we'll use
   * up-to-date values for this new socket.
   */
  efab_get_os_settings(&NI_OPTS_TRS(trs));
  return 0;
}
Exemple #6
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }