Esempio n. 1
0
static void
__ci_netif_filter_remove(ci_netif* ni, unsigned hash1,
                         unsigned hash2, int hops, unsigned last_tbl_i)
{
  ci_netif_filter_table* tbl = ni->filter_table;
  ci_netif_filter_table_entry* entry;
  unsigned tbl_i;
  int i;

  tbl_i = hash1;
  for( i = 0; i < hops; ++i ) {
    entry = &tbl->table[tbl_i];
    ci_assert(entry->id != EMPTY);
    ci_assert(entry->route_count > 0);
    if( --entry->route_count == 0 && entry->id == TOMBSTONE ) {
      CITP_STATS_NETIF(--ni->state->stats.table_n_slots);
      entry->id = EMPTY;
    }
    tbl_i = (tbl_i + hash2) & tbl->table_size_mask;
  }
  ci_assert(tbl_i == last_tbl_i);

  CITP_STATS_NETIF(--ni->state->stats.table_n_entries);
  entry = &tbl->table[tbl_i];
  if( entry->route_count == 0 ) {
    CITP_STATS_NETIF(--ni->state->stats.table_n_slots);
    entry->id = EMPTY;
  }
  else {
    entry->id = TOMBSTONE;
  }
}
Esempio n. 2
0
static int citp_udp_connect(citp_fdinfo* fdinfo,
			    const struct sockaddr* sa, socklen_t sa_len,
                            citp_lib_context_t* lib_context)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  int rc;

  Log_V(log(LPF "connect(%d, sa, %d)", fdinfo->fd, sa_len));

  if( (epi->sock.s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) != 0 ) {
    log("ERROR: connect of socket with SO_REUSEPORT not supported unless "
        "supported by the OS.");
    return -1;
  }

  ci_netif_lock_fdi(epi);
  rc = ci_udp_connect(&epi->sock, fdinfo->fd, sa, sa_len);
  ci_netif_unlock_fdi(epi);

  if( rc == CI_SOCKET_HANDOVER ) {
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_connect);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  citp_fdinfo_release_ref( fdinfo, 0 );
  return rc;
}
Esempio n. 3
0
static int citp_udp_setsockopt(citp_fdinfo* fdinfo, int level,
		       int optname, const void* optval, socklen_t optlen)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  citp_socket* ep    = &epi->sock;
  ci_sock_cmn* s     = ep->s;
  int rc;

  Log_VSC(log("%s("EF_FMT", %d, %d)", __FUNCTION__,
	      EF_PRI_ARGS(epi, fdinfo->fd),  level, optname));

  rc = ci_udp_setsockopt(&epi->sock, fdinfo->fd,
			 level, optname, optval, optlen);

  Log_V(log(LPF "setsockopt: fd=%d rc=%d", fdinfo->fd, rc));

  if( rc == CI_SOCKET_HANDOVER ) {
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_setsockopt);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  if( ci_opt_is_setting_reuseport(level, optname, optval, optlen) != 0 &&
      ! CI_SOCK_NOT_BOUND(s) ) {
    ci_log("%s: setting reuseport after binding on udp not supported",
           __FUNCTION__);
    return -ENOSYS;
  }

  citp_fdinfo_release_ref(fdinfo, 0);
  return rc;
}
Esempio n. 4
0
ci_inline ci_tcp_state*
get_ts_from_cache(ci_netif *netif, 
                  ci_tcp_state_synrecv* tsr, 
                  ci_tcp_socket_listen* tls)
{
  ci_tcp_state *ts = NULL;
#if CI_CFG_FD_CACHING
  if( ci_ni_dllist_not_empty(netif, &tls->epcache.cache) ) {
    /* Take the entry from the cache */
    ci_ni_dllist_link *link = ci_ni_dllist_pop(netif, &tls->epcache.cache);
    ts = CI_CONTAINER (ci_tcp_state, epcache_link, link);
    ci_assert (ts);
    ci_ni_dllist_self_link(netif, &ts->epcache_link);

    LOG_EP(ci_log("Taking cached fd %d off cached list, (onto acceptq)",
           ts->cached_on_fd));

    if( tcp_laddr_be32(ts) == tsr->l_addr ) {
      ci_tcp_state_init(netif, ts, 1);
      /* Shouldn't have touched these bits of state */
      ci_assert(!(ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
      ci_assert(ci_tcp_is_cached(ts));

      CITP_STATS_NETIF(++netif->state->stats.sockcache_hit);
      CITP_STATS_TCP_LISTEN(++tls->stats.n_sockcache_hit);
    }
    else {
      /* Oh dear -- the tcp-state we cached was using a different local IP
       * address.  This means we've accepted a connection from a different
       * interface as we did for the thing we've cached.  Which means we
       * can't share the hardware filter after all.  For now, just bung it
       * back on the list.
       */
      LOG_EP(ci_log("changed interface of cached EP, re-queueing"));
      ci_ni_dllist_push_tail(netif, &tls->epcache.cache, &ts->epcache_link);
      ts = NULL;
      CITP_STATS_NETIF(++netif->state->stats.sockcache_miss_intmismatch);
    }
  }
#endif
  return ts;
}
Esempio n. 5
0
/* Conclude the EP's binding.  This function is abstracted from the
 * main bind code to allow implicit binds that occur when sendto() is
 * called on an OS socket.  [lport] and CI_SIN(addr)->sin_port do not
 * have to be the same value. */
static int ci_udp_bind_conclude(citp_socket* ep, const struct sockaddr* addr,
                                ci_uint16 lport )
{
    ci_udp_state* us;
    ci_uint32 addr_be32;
    int rc;

    CHECK_UEP(ep);
    ci_assert(addr != NULL);

    if( ci_udp_should_handover(ep, addr, lport) )
        goto handover;

    addr_be32 = ci_get_ip4_addr(ep->s->domain, addr);

    ci_udp_set_laddr(ep, addr_be32, lport);
    us = SOCK_TO_UDP(ep->s);
    if( addr_be32 != 0 )
        us->s.cp.sock_cp_flags |= OO_SCP_LADDR_BOUND;
    /* reset any rx/tx that have taken place already */
    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);

#ifdef ONLOAD_OFE
    if( ep->netif->ofe != NULL )
        us->s.ofe_code_start = ofe_socktbl_find(
                                   ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                   udp_laddr_be32(us), udp_raddr_be32(us),
                                   udp_lport_be16(us), udp_rport_be16(us));
#endif

    /* OS source addrs have already been handed-over, so this must be one of
     * our src addresses.
     */
    rc = ci_udp_set_filters( ep, us);
    ci_assert( !UDP_GET_FLAG(us, CI_UDPF_EF_BIND) );
    /*! \todo FIXME isn't the port the thing to be testing here? */
    if( udp_laddr_be32(us) != INADDR_ANY_BE32 )
        UDP_SET_FLAG(us, CI_UDPF_EF_BIND);
    CI_UDPSTATE_SHOW_EP( ep );
    if( rc == CI_SOCKET_ERROR && CITP_OPTS.no_fail) {
        CITP_STATS_NETIF(++ep->netif->state->stats.udp_bind_no_filter);
        goto handover;
    }
    return rc;

handover:
    LOG_UV(log("%s: "SK_FMT" HANDOVER", __FUNCTION__, SK_PRI_ARGS(ep)));
    return CI_SOCKET_HANDOVER;
}
Esempio n. 6
0
void ci_tcp_listenq_drop_oldest(ci_netif* ni, ci_tcp_socket_listen* tls)
{
  ci_tcp_state_synrecv* tsr;
  int i;

  for( i = CI_CFG_TCP_SYNACK_RETRANS_MAX; i >= 0; --i ) {
    if( ci_ni_dllist_not_empty(ni, &tls->listenq[i]) )
      break;
  }
  ci_assert(ci_ni_dllist_not_empty(ni, &tls->listenq[i]));
  tsr = ci_tcp_link2synrecv(ci_ni_dllist_head(ni, &tls->listenq[i]));
  ci_tcp_listenq_drop(ni, tls, tsr);
  ci_tcp_synrecv_free(ni, tsr);
  CITP_STATS_NETIF(++ni->state->stats.synrecv_purge);
}
Esempio n. 7
0
static int citp_udp_bind(citp_fdinfo* fdinfo, const struct sockaddr* sa,
			 socklen_t sa_len)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  citp_socket* ep = &epi->sock;
  ci_sock_cmn* s = ep->s;
  int rc;

  Log_V(log(LPF "bind(%d, sa, %d)", fdinfo->fd, sa_len));

  ci_udp_handle_force_reuseport(fdinfo->fd, ep, sa, sa_len);

  if( (s->s_flags & CI_SOCK_FLAG_REUSEPORT) != 0 ) {
    if( (rc = ci_udp_reuseport_bind(ep, fdinfo->fd, sa, sa_len)) == 0 ) {
      /* The socket has moved so need to reprobe the fd.  This will also
       * map the the new stack into user space of the executing process.
       */
      fdinfo = citp_fdtable_lookup(fdinfo->fd);
      fdinfo = citp_reprobe_moved(fdinfo, CI_FALSE);
      epi = fdi_to_sock_fdi(fdinfo);
      ep = &epi->sock;
      ci_netif_cluster_prefault(ep->netif);
    }
    else {
      goto done;
    }
  }

  ci_netif_lock_fdi(epi);
  rc = ci_udp_bind(ep, fdinfo->fd, sa, sa_len);
  ci_netif_unlock_fdi(epi);

 done:
  if( rc == CI_SOCKET_HANDOVER ) {
    ci_assert_equal(s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY, 0);
    CITP_STATS_NETIF(++epi->sock.netif->state->stats.udp_handover_bind);
    citp_fdinfo_handover(fdinfo, -1);
    return 0;
  }

  citp_fdinfo_release_ref( fdinfo, 0 );
  return rc;
}
Esempio n. 8
0
ci_inline int sendpage_copy(ci_netif* ni, ci_tcp_state* ts, struct page* page,
                            int offset, size_t size, int flags)
{
  struct iovec io;
  struct msghdr m;
  int rc;

  CITP_STATS_NETIF(++ni->state->stats.tcp_sendpages);

  io.iov_base = (char*) kmap(page) + offset;
  io.iov_len = size;
  m.msg_iov = &io;
  m.msg_iovlen = 1;
  m.msg_controllen = 0;
  m.msg_namelen = 0;

  rc = ci_tcp_sendmsg(ni, ts, &m,
                      (ts->s.b.sb_aflags & CI_SB_AFLAG_O_NONBLOCK) | flags,
                      CI_ADDR_SPC_KERNEL);

  kunmap(page);
  return rc;
}
Esempio n. 9
0
/* Complete a UDP U/L connect.  The sys connect() call must have been made
 * (and succeeded) before calling this function.  So if anything goes wrong
 * in here, then it can be consider an internal error or failing of onload.
 */
int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd,
                            const struct sockaddr* serv_addr,
                            socklen_t addrlen, ci_fd_t os_sock)
{
    const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr;
    ci_uint32 dst_be32;
    ci_udp_state* us = SOCK_TO_UDP(ep->s);
    int onloadable;
    int rc = 0;

    CHECK_UEP(ep);

    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);
    us->s.rx_errno = 0;
    us->s.tx_errno = 0;

    if( IS_DISCONNECTING(serv_sin) ) {
        rc = ci_udp_disconnect(ep, us, os_sock);
        goto out;
    }
#if CI_CFG_FAKE_IPV6
    if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) {
        LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us)));
        goto handover;
    }
#endif

    dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr);
    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)",
                  FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                  CI_BSWAP_BE16(serv_sin->sin_port), errno));
        goto out;
    }

    us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED;
    ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port);
    cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp);

    switch( us->s.pkt.status ) {
    case retrrc_success:
    case retrrc_nomac:
        onloadable = 1;
        break;
    default:
        onloadable = 0;
        if( NI_OPTS(ep->netif).udp_connect_handover ) {
            LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                       ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
            goto handover;
        }
        break;
    }

    if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) {
        LOG_UC(log(FNT_FMT "%s:%d - route via OS socket",
                   FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                   CI_BSWAP_BE16(serv_sin->sin_port)));
        ci_udp_clr_filters(ep);
        return 0;
    }
    if( CI_IP_IS_LOOPBACK(dst_be32) ) {
        /* After connecting via loopback it is not possible to connect anywhere
         * else.
         */
        LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                   ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
        goto handover;
    }

    if( onloadable ) {
#ifdef ONLOAD_OFE
        if( ep->netif->ofe != NULL )
            us->s.ofe_code_start = ofe_socktbl_find(
                                       ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                       udp_laddr_be32(us), udp_raddr_be32(us),
                                       udp_lport_be16(us), udp_rport_be16(us));
#endif

        if( (rc = ci_udp_set_filters(ep, us)) != 0 ) {
            /* Failed to set filters.  Most likely we've run out of h/w filters.
             * Handover to O/S to avoid breaking the app.
             *
             * TODO: Actually we probably won't break the app if we don't
             * handover, as packets will still get delivered via the kernel
             * stack.  Might be worth having a runtime option to choose whether
             * or not to handover in such cases.
             */
            LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)",
                      FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                      CI_BSWAP_BE16(serv_sin->sin_port), rc));
            CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter);
            goto out;
        }
    }
    else {
        ci_udp_clr_filters(ep);
    }

    LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)",
               SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS",
               ip_addr_str(udp_laddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)),
               ip_addr_str(udp_raddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno));
    return 0;

out:
    if( rc < 0 && CITP_OPTS.no_fail )
        goto handover;
    return rc;

handover:
    ci_udp_clr_filters(ep);
    return CI_SOCKET_HANDOVER;
}
Esempio n. 10
0
static int citp_udp_socket(int domain, int type, int protocol)
{
  citp_fdinfo* fdi;
  citp_sock_fdi* epi;
  ef_driver_handle fd;
  int rc;
  ci_netif* ni;

  Log_V(log(LPF "socket(%d, %d, %d)", domain, type, protocol));

  epi = CI_ALLOC_OBJ(citp_sock_fdi);
  if( ! epi ) {
    Log_U(ci_log(LPF "socket: failed to allocate epi"));
    errno = ENOMEM;
    goto fail1;
  }
  fdi = &epi->fdinfo;
  citp_fdinfo_init(fdi, &citp_udp_protocol_impl);

  rc = citp_netif_alloc_and_init(&fd, &ni);
  if( rc != 0 ) {
    if( rc == CI_SOCKET_HANDOVER ) {
      /* This implies EF_DONT_ACCELERATE is set, so we handover
       * regardless of CITP_OPTS.no_fail */
      CI_FREE_OBJ(epi);
      return rc;
    }
    goto fail2;
  }

  /* Protect the fdtable entry until we're done initialising. */
  if( fdtable_strict() )  CITP_FDTABLE_LOCK();
  if((fd = ci_udp_ep_ctor(&epi->sock, ni, domain, type)) < 0) {
    /*! ?? \TODO unpick the ci_udp_ep_ctor according to how failed */
    Log_U(ci_log(LPF "socket: udp_ep_ctor failed"));
    errno = -fd;
    goto fail3;
  }

  citp_fdtable_new_fd_set(fd, fdip_busy, fdtable_strict());
  if( fdtable_strict() )  CITP_FDTABLE_UNLOCK();

  CI_DEBUG(epi->sock.s->pid = getpid());

  /* We're ready.  Unleash us onto the world! */
  ci_assert(epi->sock.s->b.sb_aflags & CI_SB_AFLAG_NOT_READY);
  ci_atomic32_and(&epi->sock.s->b.sb_aflags, ~CI_SB_AFLAG_NOT_READY);
  citp_fdtable_insert(fdi, fd, 0);

  Log_VSS(log(LPF "socket(%d, %d, %d) = "EF_FMT, domain, type, protocol,
              EF_PRI_ARGS(epi,fd)));
  return fd;

 fail3:
  if( CITP_OPTS.no_fail && errno != ELIBACC )
    CITP_STATS_NETIF(++ni->state->stats.udp_handover_socket);
  citp_netif_release_ref(ni, 0);
 fail2:
  CI_FREE_OBJ(epi);
 fail1:
  /* BUG1408: Graceful failure. We'll only fail outright if there's a
   * driver/library mismatch */
  if( CITP_OPTS.no_fail && errno != ELIBACC ) {
    Log_U(ci_log("%s: failed (errno:%d) - PASSING TO OS", __FUNCTION__, errno));
    return CI_SOCKET_HANDOVER;
  }
  return -1;
}
Esempio n. 11
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }