Example #1
0
void citp_waitable_print(citp_waitable* w)
{
  /* Output socket using netstat style output:
   *   TCP 2 0 0.0.0.0:12865 0.0.0.0:0 LISTEN
   *   UDP 0 0 172.16.129.131:57521 0.0.0.0:0 UDP
   */
  if( CI_TCP_STATE_IS_SOCKET(w->state) ) {
    ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w);
    citp_waitable_obj* wo = CI_CONTAINER(citp_waitable_obj, waitable, w);
    int tq = 0;
    int rq = 0;
    
    if( (w->state & CI_TCP_STATE_TCP) &&
       !(w->state & CI_TCP_STATE_NOT_CONNECTED) ) {
      tq = ci_tcp_sendq_n_pkts(&wo->tcp);
      rq = wo->tcp.recv1.num + wo->tcp.recv2.num;
    }
    else if( w->state == CI_TCP_STATE_UDP ) {
      tq = wo->udp.tx_count + oo_atomic_read(&wo->udp.tx_async_q_level);
      rq = ci_udp_recv_q_pkts(&wo->udp.recv_q);
    }
    log("%s %d %d "OOF_IP4PORT" "OOF_IP4PORT" %s",
        citp_waitable_type_str(w), rq, tq,
        OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)),
        OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)),
        ci_tcp_state_str(w->state));
  }
}
Example #2
0
ci_inline void ci_tcp_set_addr_on_promote(ci_netif* netif, ci_tcp_state* ts,
                                          ci_tcp_state_synrecv* tsr,
                                          ci_tcp_socket_listen* tls)
{
  /* copy and initialise state */
  ts->s.pkt.ip.ip_saddr_be32 = tsr->l_addr;
  TS_TCP(ts)->tcp_source_be16 = sock_lport_be16(&tls->s);
  ts->s.cp.ip_laddr_be32 = tsr->l_addr;
  ts->s.cp.lport_be16 = sock_lport_be16(&tls->s);
  ci_tcp_set_peer(ts, tsr->r_addr, tsr->r_port);

  /* "filter" equivalent for loopback socket */
  if( OO_SP_NOT_NULL(tsr->local_peer) ) {
    ci_tcp_state *peer = ID_TO_TCP(netif, tsr->local_peer);
    ts->local_peer = tsr->local_peer;
    peer->local_peer = S_SP(ts);
  }
}
Example #3
0
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr,
                                    unsigned lport, unsigned raddr,
                                    unsigned rport, unsigned protocol,
                                    int intf_i, int vlan,
                                    int (*callback)(ci_sock_cmn*, void*),
                                    void* callback_arg, ci_uint32* hash_out)
{
  ci_netif_filter_table* tbl;
  unsigned hash1, hash2 = 0;
  unsigned first;

  tbl = ni->filter_table;
  if( hash_out != NULL )
    *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol);
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u",
             __FUNCTION__, CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if(CI_LIKELY( id >= 0 )) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
        if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD ||
                       ci_sock_intf_check(ni, s, intf_i, vlan)) ))
          if( callback(s, callback_arg) != 0 )
            return;
    }
    else if( id == EMPTY )
      break;
    /* We defer calculating hash2 until it's needed, just to make the fast
    ** case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(ni), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      break;
    }
  }
}
Example #4
0
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport,
			   unsigned raddr, unsigned rport, unsigned protocol)
{
  unsigned hash1, hash2 = 0;
  ci_netif_filter_table* tbl;
  unsigned first;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));
  ci_assert(netif->filter_table);

  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u",
	     CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(netif, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
      	return hash1;
    }
    if( id == EMPTY )  break;
    /* We defer calculating hash2 until it's needed, just to make the fast
     * case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(netif), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      return -ELOOP;
    }
  }

  return -ENOENT;
}
Example #5
0
static void thc_dump_sockets(ci_netif* netif, oo_dump_log_fn_t log,
                             void* log_arg)
{
  unsigned id;
  for( id = 0; id < netif->state->n_ep_bufs; ++id ) {
    citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(netif, id);
    if( wo->waitable.state != CI_TCP_STATE_FREE ) {
      citp_waitable* w = &wo->waitable;
      ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w);
      log(log_arg, "    %s lcl="OOF_IP4PORT" rmt="OOF_IP4PORT,
          citp_waitable_type_str(w),
          OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)),
          OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)));
    }
  }
}
Example #6
0
/* Set a reuseport bind on a socket.
 */
int ci_tcp_reuseport_bind(ci_sock_cmn* sock, ci_fd_t fd)
{
  int rc;
  /* With legacy reuseport we delay the __ci_bind actions to avoid errors
   * when trying to re-use a port for the os socket, so won't have set the
   * PORT_BOUND flag yet.
   */
  ci_assert(((sock->s_flags & CI_SOCK_FLAG_PORT_BOUND) != 0) ||
            ((sock->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) != 0));
  ci_assert_nequal(sock->s_flags & CI_SOCK_FLAG_REUSEPORT, 0);
  if ( (rc = ci_tcp_ep_reuseport_bind(fd, CITP_OPTS.cluster_name,
                                      CITP_OPTS.cluster_size,
                                      CITP_OPTS.cluster_restart_opt,
                                      sock_laddr_be32(sock), 
                                      sock_lport_be16(sock))) != 0 ) {
    errno = -rc;
    return -1;
  }
  return 0;
}
Example #7
0
static void citp_waitable_dump2(ci_netif* ni, citp_waitable* w, const char* pf,
                                oo_dump_log_fn_t logger, void* log_arg)
{
  unsigned tmp;

  if( CI_TCP_STATE_IS_SOCKET(w->state) ) {
    ci_sock_cmn* s = CI_CONTAINER(ci_sock_cmn, b, w);
    logger(log_arg, "%s%s "NT_FMT"lcl="OOF_IP4PORT" rmt="OOF_IP4PORT" %s",
           pf, citp_waitable_type_str(w), NI_ID(ni), W_FMT(w),
           OOFA_IP4PORT(sock_laddr_be32(s), sock_lport_be16(s)),
           OOFA_IP4PORT(sock_raddr_be32(s), sock_rport_be16(s)),
           ci_tcp_state_str(w->state));
  }
  else
    logger(log_arg, "%s%s "NT_FMT, pf,
           citp_waitable_type_str(w), NI_ID(ni), W_FMT(w));

  if( w->state == CI_TCP_STATE_FREE || w->state == CI_TCP_STATE_AUXBUF )
    return;

  tmp = w->lock.wl_val;
  logger(log_arg, "%s  lock: %x %s%s", pf, tmp,
         (tmp & OO_WAITABLE_LK_LOCKED) ? "LOCKED" : "",
         (tmp & OO_WAITABLE_LK_NEED_WAKE) ? " CONTENDED": "");

  logger(log_arg, "%s  rx_wake=%08x%s tx_wake=%08x%s flags: "CI_SB_FLAGS_FMT,
         pf,
         w->sleep_seq.rw.rx,
         ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_RX_B) ? "(RQ)":"    ",
         w->sleep_seq.rw.tx,
         ci_bit_test(&w->wake_request, CI_SB_FLAG_WAKE_TX_B) ? "(RQ)":"    ",
         CI_SB_FLAGS_PRI_ARG(w));

  if( w->spin_cycles == -1 )
    logger(log_arg, "%s  ul_poll: -1 spin cycles -1 usecs", pf);
  else
    logger(log_arg, "%s  ul_poll: %llu spin cycles %u usec", pf,
         w->spin_cycles, oo_cycles64_to_usec(ni, w->spin_cycles));
}
Example #8
0
void ci_netif_filter_dump(ci_netif* ni)
{
  int id;
  unsigned i;
  ci_netif_filter_table* tbl;

  ci_assert(ni);
  tbl = ni->filter_table;

  log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
#if CI_CFG_STATS_NETIF
  log(FN_FMT "size=%d n_entries=%i n_slots=%i max=%i mean=%i", FN_PRI_ARGS(ni),
      tbl->table_size_mask + 1, ni->state->stats.table_n_entries,
      ni->state->stats.table_n_slots, ni->state->stats.table_max_hops,
      ni->state->stats.table_mean_hops);
#endif

  for( i = 0; i <= tbl->table_size_mask; ++i ) {
    id = tbl->table[i].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      unsigned laddr = tbl->table[i].laddr;
      int lport = sock_lport_be16(s);
      unsigned raddr = sock_raddr_be32(s);
      int rport = sock_rport_be16(s);
      int protocol = sock_protocol(s);
      unsigned hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
      unsigned hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
      log("%010d id=%-10d rt_ct=%d %s "CI_IP_PRINTF_FORMAT":%d "
          CI_IP_PRINTF_FORMAT":%d %010d:%010d",
	  i, id, tbl->table[i].route_count, CI_IP_PROTOCOL_STR(protocol),
          CI_IP_PRINTF_ARGS(&laddr), CI_BSWAP_BE16(lport),
	  CI_IP_PRINTF_ARGS(&raddr), CI_BSWAP_BE16(rport), hash1, hash2);
    }
  }
  log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
}
Example #9
0
static int
efab_tcp_helper_get_info(ci_private_t *unused, void *arg)
{
  ci_netif_info_t *info = arg;
  int index, rc=0;
  tcp_helper_resource_t* thr = NULL;
  ci_netif* ni = NULL;
  int flags = EFAB_THR_TABLE_LOOKUP_CHECK_USER | EFAB_THR_TABLE_LOOKUP_NO_WARN; 

#if CI_CFG_EFAB_EPLOCK_RECORD_CONTENTIONS
  int j;
  eplock_resource_t* eplock_rs;
#endif

  info->ni_exists = 0;
  info->ni_no_perms_exists = 0;
  if( info->ni_orphan ) {
    flags |= EFAB_THR_TABLE_LOOKUP_NO_UL;
    info->ni_orphan = 0;
  }
  rc = efab_thr_table_lookup(NULL, info->ni_index, flags, &thr);
  if( rc == 0 ) {
    info->ni_exists = 1;
    info->ni_orphan = (thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND);
    ni = &thr->netif;
    info->mmap_bytes = thr->mem_mmap_bytes;
    info->k_ref_count = thr->k_ref_count;
    info->rs_ref_count = oo_atomic_read(&thr->ref_count);
    memcpy(info->ni_name, ni->state->name, sizeof(ni->state->name));
  } else if( rc == -EACCES ) {
    info->ni_no_perms_id = info->ni_index;
    if( efab_thr_get_inaccessible_stack_info(info->ni_index, 
                                             &info->ni_no_perms_uid,
                                             &info->ni_no_perms_euid,
                                             &info->ni_no_perms_share_with,
                                             info->ni_no_perms_name) == 0 )
      info->ni_no_perms_exists = 1;
  }

  /* sub-ops that do not need the netif to exist */
  if( info->ni_subop == CI_DBG_NETIF_INFO_GET_NEXT_NETIF ) {
    tcp_helper_resource_t* next_thr;

    info->u.ni_next_ni.index = -1;
    for( index = info->ni_index + 1;
         index < 10000 /* FIXME: magic! */;
         ++index ) {
      rc = efab_thr_table_lookup(NULL, index, flags, &next_thr);
      if( rc == 0 ) {
        if( next_thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND )
          efab_tcp_helper_k_ref_count_dec(next_thr, 1);
        else
          efab_thr_release(next_thr);
        info->u.ni_next_ni.index = index;
        break;
      }
      if( rc == -EACCES ) {
        info->u.ni_next_ni.index = index;
        break;
      }
    }
    rc = 0;
  }
  else if( info->ni_subop == CI_DBG_NETIF_INFO_NOOP ) {
    rc = 0;
  }

  if (!info->ni_exists)
    return 0;

  /* sub-ops that need the netif to exist */
  switch (info->ni_subop)
  {

    case CI_DBG_NETIF_INFO_GET_ENDPOINT_STATE:
      index = info->u.ni_endpoint.index;
      info->u.ni_endpoint.max = thr->netif.ep_tbl_n;
      if ((index < 0) || (index >= (int)thr->netif.ep_tbl_n)) {
        info->u.ni_endpoint.state = CI_TCP_STATE_FREE;
      }
      else {
        citp_waitable_obj* wo = ID_TO_WAITABLE_OBJ(ni, index);

        info->u.ni_endpoint.state = wo->waitable.state;

        if( wo->waitable.state == CI_TCP_STATE_UDP ) {
          ci_udp_state* us = &wo->udp;
          info->u.ni_endpoint.udpstate = us->udpflags;
          info->u.ni_endpoint.rx_pkt_ul = us->recv_q.pkts_delivered;
          info->u.ni_endpoint.rx_pkt_kn = us->stats.n_rx_os;
        }
        else if( wo->waitable.state & CI_TCP_STATE_TCP_CONN ) {
          ci_tcp_state* ts = &wo->tcp;
          info->u.ni_endpoint.tx_pkts_max = ts->so_sndbuf_pkts;
          info->u.ni_endpoint.tx_pkts_num = ts->send.num;
        }
        if( wo->waitable.state & CI_TCP_STATE_SOCKET ) {
          ci_sock_cmn* s = &wo->sock;
          info->u.ni_endpoint.protocol = (int) sock_protocol(s);
          info->u.ni_endpoint.laddr = sock_laddr_be32(s);
          info->u.ni_endpoint.lport = (int) sock_lport_be16(s);
          info->u.ni_endpoint.raddr = sock_raddr_be32(s);
          info->u.ni_endpoint.rport = (int) sock_rport_be16(s);
        }
      }
      break;

    case CI_DBG_NETIF_INFO_GET_NEXT_NETIF:
      /* If the current netif is found, we need to succeed */
      break;

    case CI_DBG_NETIF_INFO_NOOP:
      /* Always succeeds, rc already set */
      break;

    default:
      rc = -EINVAL;
      break;
  }
  if( thr ) {
    /* Lookup needs a matching efab_thr_release() in case of ordinary
     * stack but just a ref_count_dec in case of orphan
     */
    if( thr->k_ref_count & TCP_HELPER_K_RC_NO_USERLAND )
      efab_tcp_helper_k_ref_count_dec(thr, 1);
    else
      efab_thr_release(thr);
  }
  return rc;
}
Example #10
0
/*
** promote a synrecv structure to an established socket
**
** Assumes that the caller will handle a fail if we can't allocate a new
** tcp_state structure due to memory pressure or the like
*/
int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls,
                               ci_tcp_state_synrecv* tsr,
                               ci_ip_cached_hdrs* ipcache,
                               ci_tcp_state** ts_out)
{
  int rc = 0;
  
  ci_assert(netif);
  ci_assert(tls);
  ci_assert(tls->s.b.state == CI_TCP_LISTEN);
  ci_assert(tsr);

  if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) {
    ci_tcp_state* ts;

    /* grab a tcp_state structure that will go onto the accept queue.  We take
     * from the cache of EPs if any are available
     */
    ts = get_ts_from_cache (netif, tsr, tls); 
    if( !ts ) {
      /* None on cache; try allocating a new ts */
      ts = ci_tcp_get_state_buf(netif);
#if CI_CFG_FD_CACHING
      if( ts == NULL ) {
        /* We've reaped.  Did this result in any being cached */
        ts = get_ts_from_cache(netif, tsr, tls);
        if (ts == NULL ) {
          /* No -- try again to allocate. */
          ts = ci_tcp_get_state_buf(netif);
        }
        else {
          CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap);
        }
      }
#endif
      if( ts == NULL ) {
        LOG_TV(ci_log("%s: [%d] out of socket buffers",
                      __FUNCTION__, NI_ID(netif)));
        CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock);
        CI_SET_SO_ERROR(&tls->s, ENOMEM);
        citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
        return -ENOMEM;
      }


      ci_assert(ci_tcp_is_cached(ts) ||
                (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN));
    }

#ifdef ONLOAD_OFE
    ts->s.ofe_code_start = tls->ofe_promote;
#endif

    if( ! ci_tcp_is_cached(ts) ) {
      /* Need to initialise address information for use when setting filters */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      /* "borrow" filter from listening socket.  For loopback socket, we
       * do not need filters, but we have to take a reference of the OS
       * socket. */
      rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice,
                                 S_SP(tls));
      if( rc < 0 ) {
        LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc));
        /* Either put this back on the list (at the head) or free it */
        ci_tcp_state_free(netif, ts);
        return rc;
      }
    }
#if CI_CFG_FD_CACHING
    else {
      /* Now set the s/w filter.  We leave the hw filter in place for cached
       * EPS. This will probably not have the correct raddr and rport, but as
       * it's sharing the listening socket's filter that's not a problem.  It
       * will be updated if this is still around when the listener is closed.
       */
      rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr,
                                  sock_lport_be16(&tls->s), tsr->r_addr,
                                  tsr->r_port, tcp_protocol(ts));

      if (rc < 0) {
        /* Bung it back on the cache list */
        LOG_EP(ci_log("Unable to create s/w filter!"));
        ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link);
        return rc;
      }

      /* Need to initialise address information.  We do this after trying to
       * insert the sw filter, so we can push the tcp state back onto the
       * cache queue with as few changes as possible if we fail to add the
       * sw filter.
       */
      ci_tcp_set_addr_on_promote(netif, ts, tsr, tls);

      LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd));
      ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link);
    }
#endif

    ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts)));
    ci_assert(ts->s.b.state == CI_TCP_CLOSED);
    ts->s.domain = tls->s.domain;

    cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache);
    ci_pmtu_state_init(netif, &ts->s, &ts->pmtus,
                       CI_IP_TIMER_PMTU_DISCOVER);
    ci_pmtu_set(netif, &ts->pmtus,
                CI_MIN(ts->s.pkt.mtu,
                       tsr->tcpopts.smss + sizeof(ci_tcp_hdr)
                         + sizeof(ci_ip4_hdr)));

    /* If we've got SYN via local route, we can handle it */
    ci_assert_equiv(ts->s.pkt.status == retrrc_localroute,
                    OO_SP_NOT_NULL(tsr->local_peer));
    if( ts->s.pkt.status == retrrc_localroute )
      ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE;

    ts->amss = tsr->amss;

    /* options and flags */
    ts->tcpflags = 0;
    ts->tcpflags |= tsr->tcpopts.flags;
    ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED;
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr);
    if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) {
      ts->snd_wscl = tsr->tcpopts.wscl_shft;
      ts->rcv_wscl = tsr->rcv_wscl;
    } else {
      ts->snd_wscl = ts->rcv_wscl = 0u;
    }
    CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl);
    CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl);

    /* Send and receive sequence numbers */
    tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
      tsr->snd_isn + 1;
    ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0);
    ci_tcp_rx_set_isn(ts, tsr->rcv_nxt);
    tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1);

    if( ts->tcpflags & CI_TCPT_FLAG_TSO ) {
      ts->incoming_tcp_hdr_len += 12;
      ts->outgoing_hdrs_len += 12;
      ts->tspaws = ci_tcp_time_now(netif);
      ts->tsrecent = tsr->tspeer;
      ts->tslastack = tsr->rcv_nxt;
    }
    else {
      /* Must be after initialising snd_una. */
      ci_tcp_clear_rtt_timing(ts);
      ts->timed_ts = tsr->timest;
    }
    /* SACK has nothing to be done. */

    /* ?? ECN */
    ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr)));

    ts->smss = tsr->tcpopts.smss;
    ts->c.user_mss = tls->c.user_mss;
    if (ts->c.user_mss && ts->c.user_mss < ts->smss)
      ts->smss = ts->c.user_mss;
#if CI_CFG_LIMIT_SMSS
    ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__);
#endif
    ci_assert(ts->smss>0);
    ci_tcp_set_eff_mss(netif, ts);
    ci_tcp_set_initialcwnd(netif, ts);

    /* Copy socket options & related fields that should be inherited. 
     * Note: Windows does not inherit rcvbuf until the call to accept 
     * completes. The assumption here is that all options can be
     * inherited at the same time (most won't have an effect until there
     * is a socket available for use by the app.).
     */
    ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)");

    /* NB. Must have already set peer (which we have). */
    ci_tcp_set_established_state(netif, ts);
    CITP_STATS_NETIF(++netif->state->stats.synrecv2established);
  
    ci_assert(ts->ka_probes == 0);
    ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts));
    ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);

    /* Remove the synrecv structure from the listen queue, and free the
    ** buffer. */
    if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE )
      ci_free(tsr);
    else {
      ci_tcp_listenq_remove(netif, tls, tsr);
      ci_tcp_synrecv_free(netif, tsr);
    }

    ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT);
    ci_tcp_acceptq_put(netif, tls, &ts->s.b);

    LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x",
               LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags);
           log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x",
               LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts),
               tcp_snd_una(ts),
               tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts)));

    citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX);
    *ts_out = ts;
    return 0;
  }
Example #11
0
/* In this bind handler we just check that the address to which
 * are binding is either "any" or one of ours. 
 * In the Linux kernel version [fd] is unused.
 */
int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr,
                socklen_t addrlen, ci_fd_t fd )
{
  struct sockaddr_in* my_addr_in;
  ci_uint16 new_port;
  ci_uint32 addr_be32;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc;

  CHECK_TEP(ep);

  my_addr_in = (struct sockaddr_in*) my_addr;

  /* Check if state of the socket is OK for bind operation. */
  /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used.
   *       What is better? */
  if (my_addr == NULL)
    RET_WITH_ERRNO( EINVAL );


  if (s->b.state != CI_TCP_CLOSED)
    RET_WITH_ERRNO( EINVAL );

  if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB)
    RET_WITH_ERRNO( EINVAL );

  if( my_addr->sa_family != s->domain )
    RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL );

  /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) 
   * Linux is also relaxed about overlength data areas. */
  if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in))
    RET_WITH_ERRNO( EINVAL );

#if CI_CFG_FAKE_IPV6
  if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133)
    RET_WITH_ERRNO( EINVAL );

  if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) )
    return CI_SOCKET_HANDOVER;
#endif
  addr_be32 = ci_get_ip4_addr(s->domain, my_addr);
 
  /* Using the port number provided, see if we can do this bind */
  new_port = my_addr_in->sin_port;

  if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) {
    struct ci_port_list *force_reuseport;
    CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link,
                        (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) {
      if( force_reuseport->port == new_port ) {
        int one = 1;
        ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
        ci_assert(CI_IS_VALID_SOCKET(os_sock));
        rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one,
                               sizeof(one));
        ci_rel_os_sock_fd(os_sock);
        if( rc != 0 && errno == ENOPROTOOPT )
          ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY;
        ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT;
        LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u",
                   __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port));
      }
    }
  }

  if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) 
    CI_LOGLEVEL_TRY_RET(LOG_TV,
		        __ci_bind(ep->netif, ep->s, addr_be32, &new_port));
  ep->s->s_flags |= CI_SOCK_FLAG_BOUND;
  sock_lport_be16(s) = new_port; 
  sock_laddr_be32(s) = addr_be32;
  if( CI_IP_IS_MULTICAST(addr_be32) )
    s->cp.ip_laddr_be32 = 0;
  else
    s->cp.ip_laddr_be32 = addr_be32;
  s->cp.lport_be16 = new_port;
  sock_rport_be16(s) = sock_raddr_be32(s) = 0;

  LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32),
	     (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port),
	     CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); 

  return 0;
}