Beispiel #1
0
ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, 
				      unsigned lport, unsigned raddr, 
				      unsigned rport, unsigned protocol)
{
  int rc;

  /* try full lookup */
  rc = ci_netif_filter_lookup(netif, laddr, lport,  raddr, rport, protocol);
  LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d",
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     rc));    

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);

  /* try wildcard lookup */
  raddr = rport = 0;
  rc = ci_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol);
  LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d",
	    ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	    ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	    rc));

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);
 
  return 0;
}
Beispiel #2
0
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr,
                                    unsigned lport, unsigned raddr,
                                    unsigned rport, unsigned protocol,
                                    int intf_i, int vlan,
                                    int (*callback)(ci_sock_cmn*, void*),
                                    void* callback_arg, ci_uint32* hash_out)
{
  ci_netif_filter_table* tbl;
  unsigned hash1, hash2 = 0;
  unsigned first;

  tbl = ni->filter_table;
  if( hash_out != NULL )
    *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol);
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u",
             __FUNCTION__, CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if(CI_LIKELY( id >= 0 )) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
        if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD ||
                       ci_sock_intf_check(ni, s, intf_i, vlan)) ))
          if( callback(s, callback_arg) != 0 )
            return;
    }
    else if( id == EMPTY )
      break;
    /* We defer calculating hash2 until it's needed, just to make the fast
    ** case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(ni), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      break;
    }
  }
}
Beispiel #3
0
static int citp_udp_sendmmsg(citp_fdinfo* fdinfo, struct mmsghdr* mmsg, 
                             unsigned vlen, int flags)
{
  citp_sock_fdi* epi = fdi_to_sock_fdi(fdinfo);
  ci_udp_iomsg_args a;
  int i, rc;

  Log_V(log(LPF "sendmmsg(%d, msg, %u, %#x)", fdinfo->fd, vlen, 
            (unsigned) flags));

  if( vlen == 0 ) 
    return 0;

  a.ep = &epi->sock;
  a.fd = fdinfo->fd;
  a.ni = epi->sock.netif;
  a.us = SOCK_TO_UDP(epi->sock.s);

  i = 0;

  do {
    rc = ci_udp_sendmsg(&a, &mmsg[i].msg_hdr, flags);
    if(CI_LIKELY( rc >= 0 ) )
      mmsg[i].msg_len = rc;
    ++i;
  } while( rc >= 0 && i < vlen );
  return (rc>=0) ? i : rc;
}
Beispiel #4
0
/* check that we can handle this destination */
static int ci_tcp_connect_check_dest(citp_socket* ep, ci_ip_addr_t dst_be32,
                                     int dport_be16)
{
  ci_ip_cached_hdrs* ipcache = &ep->s->pkt;

  ipcache->ip.ip_daddr_be32 = dst_be32;
  ipcache->dport_be16 = dport_be16;
  cicp_user_retrieve(ep->netif, ipcache, &ep->s->cp);

  if(CI_LIKELY( ipcache->status == retrrc_success ||
                ipcache->status == retrrc_nomac   ||
                ipcache->status < 0 )) {
    /* Onloadable. */
    if( ipcache->encap.type & CICP_LLAP_TYPE_XMIT_HASH_LAYER4 )
      /* We don't yet have a local port number, so the result of that
       * lookup may be wrong.
       */
      ci_ip_cache_invalidate(ipcache);
    if( ipcache->ip.ip_saddr_be32 == 0 ) {
      /* Control plane has selected a source address for us -- remember it. */
      ipcache->ip.ip_saddr_be32 = ipcache->ip_saddr_be32;
      ep->s->cp.ip_laddr_be32 = ipcache->ip_saddr_be32;
    }
    return 0;
  }
  else if( ipcache->status == retrrc_localroute ) {
    ci_tcp_state* ts = SOCK_TO_TCP(ep->s);

    if( NI_OPTS(ep->netif).tcp_client_loopback == CITP_TCP_LOOPBACK_OFF)
      return CI_SOCKET_HANDOVER;

    ep->s->s_flags |= CI_SOCK_FLAG_BOUND_ALIEN;
    if( NI_OPTS(ep->netif).tcp_server_loopback != CITP_TCP_LOOPBACK_OFF )
      ts->local_peer = ci_tcp_connect_find_local_peer(ep->netif, dst_be32,
                                                      dport_be16);
    else
      ts->local_peer = OO_SP_NULL;

    if( OO_SP_NOT_NULL(ts->local_peer) ||
        NI_OPTS(ep->netif).tcp_client_loopback !=
        CITP_TCP_LOOPBACK_SAMESTACK ) {
      ipcache->flags |= CI_IP_CACHE_IS_LOCALROUTE;
      if( ipcache->ip.ip_saddr_be32 == 0 ) {
        ipcache->ip.ip_saddr_be32 = dst_be32;
        ep->s->cp.ip_laddr_be32 = dst_be32;
      }
      ipcache->ether_offset = 4; /* lo is non-VLAN */
      ipcache->ip_saddr_be32 = dst_be32;
      ipcache->dport_be16 = dport_be16;
      return 0;
    }
    return CI_SOCKET_HANDOVER;
  }

  return CI_SOCKET_HANDOVER;
}
Beispiel #5
0
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport,
			   unsigned raddr, unsigned rport, unsigned protocol)
{
  unsigned hash1, hash2 = 0;
  ci_netif_filter_table* tbl;
  unsigned first;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));
  ci_assert(netif->filter_table);

  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u",
	     CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(netif, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
      	return hash1;
    }
    if( id == EMPTY )  break;
    /* We defer calculating hash2 until it's needed, just to make the fast
     * case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(netif), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      return -ELOOP;
    }
  }

  return -ENOENT;
}
Beispiel #6
0
ssize_t linux_tcp_helper_fop_sendpage(struct file* filp, struct page* page, 
                                      int offset, size_t size,
                                      loff_t* ppos, int flags)
{
  ci_private_t* priv = filp->private_data;
  tcp_helper_resource_t* trs = efab_priv_to_thr(priv);
  ci_sock_cmn* s;

  OO_DEBUG_VERB(ci_log("%s: %d:%d offset=%d size=%d flags=%x", __FUNCTION__,
                       NI_ID(&trs->netif), OO_SP_FMT(priv->sock_id), offset,
                       (int) size, flags));

  ci_assert(page);
  ci_assert_ge(offset, 0);
  ci_assert_gt(size, 0);
  ci_assert_le(offset + size, CI_PAGE_SIZE);

#ifndef MSG_SENDPAGE_NOTLAST
  /* "flags" is really "more".  Convert it. */
  if( flags )
    flags = MSG_MORE;

  /* [more] is sometimes true even for the last page.  We get a little
  ** closer to the truth by spotting that we're not reading to the end of
  ** the page. - seen on 2.6.18, but not on 2.6.26 or later
  */
  if( offset + size < CI_PAGE_SIZE && flags )
    flags = 0;
#endif

  s = SP_TO_SOCK(&trs->netif, priv->sock_id);
  if(CI_LIKELY( s->b.state & CI_TCP_STATE_TCP_CONN ))
    return sendpage_copy(&trs->netif,SOCK_TO_TCP(s),page,offset,size,flags);
  else
    /* Closed or listening.  Return epipe.  Do not send SIGPIPE, because
    ** Linux will do it for us. */
    return -s->tx_errno;
}
Beispiel #7
0
void ci_netif_filter_dump(ci_netif* ni)
{
  int id;
  unsigned i;
  ci_netif_filter_table* tbl;

  ci_assert(ni);
  tbl = ni->filter_table;

  log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
#if CI_CFG_STATS_NETIF
  log(FN_FMT "size=%d n_entries=%i n_slots=%i max=%i mean=%i", FN_PRI_ARGS(ni),
      tbl->table_size_mask + 1, ni->state->stats.table_n_entries,
      ni->state->stats.table_n_slots, ni->state->stats.table_max_hops,
      ni->state->stats.table_mean_hops);
#endif

  for( i = 0; i <= tbl->table_size_mask; ++i ) {
    id = tbl->table[i].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      unsigned laddr = tbl->table[i].laddr;
      int lport = sock_lport_be16(s);
      unsigned raddr = sock_raddr_be32(s);
      int rport = sock_rport_be16(s);
      int protocol = sock_protocol(s);
      unsigned hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
      unsigned hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
      log("%010d id=%-10d rt_ct=%d %s "CI_IP_PRINTF_FORMAT":%d "
          CI_IP_PRINTF_FORMAT":%d %010d:%010d",
	  i, id, tbl->table[i].route_count, CI_IP_PROTOCOL_STR(protocol),
          CI_IP_PRINTF_ARGS(&laddr), CI_BSWAP_BE16(lport),
	  CI_IP_PRINTF_ARGS(&raddr), CI_BSWAP_BE16(rport), hash1, hash2);
    }
  }
  log("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++");
}
Beispiel #8
0
static int citp_udp_send(citp_fdinfo* fdinfo, const struct msghdr * msg,
			 int flags)
{
  citp_sock_fdi *epi = fdi_to_sock_fdi(fdinfo);
  ci_udp_iomsg_args a;
  int rc;

  ci_assert(msg != NULL);

  a.ep = &epi->sock;
  a.fd = fdinfo->fd;
  a.ni = epi->sock.netif;
  a.us = SOCK_TO_UDP(epi->sock.s);

  /* NB. msg_name[len] validated in ci_udp_sendmsg(). */
  if(CI_LIKELY( msg->msg_iov != NULL || msg->msg_iovlen == 0 )) {
    rc = ci_udp_sendmsg( &a, msg, flags);
  }
  else {
    rc = -1;
    errno = EFAULT;
  }
  return rc;
}
Beispiel #9
0
static int ci_udp_recvmsg_get(ci_netif* ni, ci_udp_state* us,
                              ci_iovec_ptr* piov, 
                              ci_msghdr* msg, int flags)
{
  ci_ip_pkt_fmt* pkt;
  int rc;

  /* NB. [msg] can be NULL for async recv. */

  if( ci_udp_recv_q_not_readable(ni, us) )
    goto recv_q_is_empty;

  ci_rmb();

  pkt = PKT_CHK_NNL(ni, us->recv_q.extract);
  if( pkt->pf.udp.rx_flags & CI_IP_PKT_FMT_PREFIX_UDP_RX_CONSUMED ) {
    /* We know that the receive queue is not empty and if a filter is
     * involved that there are some that have passed the filter, so if
     * this pkt is already consumed, the next one must be OK to
     * receive (and already have been filtered)
     */
    us->recv_q.extract = pkt->next;
    pkt = PKT_CHK_NNL(ni, us->recv_q.extract);
    ci_assert( !(pkt->pf.udp.rx_flags & 
                 CI_IP_PKT_FMT_PREFIX_UDP_RX_CONSUMED) );
#if CI_CFG_ZC_RECV_FILTER
    if( us->recv_q_filter )
      /* Filter should have run on this packet and marked it */
      ci_assert( (pkt->pf.udp.rx_flags & 
                  (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | 
                   CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED)) ); 
    else
      /* Bump this along as we don't have a filter installed, but want
       * to keep the filter pointer in a sane place
       */
      us->recv_q.filter = us->recv_q.extract;
#endif
  }

#if CI_CFG_ZC_RECV_FILTER
  /* Skip any that the filter has dropped.  This must terminate before
   * hitting the tail because we know the queue is readable.
   */
  while( pkt->pf.udp.rx_flags & CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED ) {
    us->recv_q.extract = pkt->next;
    pkt = PKT_CHK_NNL(ni, us->recv_q.extract);
  }
#endif

#if defined(__linux__) && !defined(__KERNEL__)
  if( msg != NULL && msg->msg_controllen != 0 ) {
    if( CI_UNLIKELY(us->s.cmsg_flags != 0 ) )
      ci_ip_cmsg_recv(ni, us, pkt, msg, 0);
    else
      msg->msg_controllen = 0;
  }
#endif
  us->stamp = pkt->pf.udp.rx_stamp;

  rc = oo_copy_pkt_to_iovec_no_adv(ni, pkt, piov, pkt->pf.udp.pay_len);

  if(CI_LIKELY( rc >= 0 )) {
#if HAVE_MSG_FLAGS
    if(CI_UNLIKELY( rc < pkt->pf.udp.pay_len && msg != NULL ))
      msg->msg_flags |= LOCAL_MSG_TRUNC;
#endif
    ci_udp_recvmsg_fill_msghdr(ni, msg, pkt, &us->s);
    if( ! (flags & MSG_PEEK) ) {
      us->recv_q.bytes_delivered += pkt->pf.udp.pay_len;
      us->recv_q.pkts_delivered  += 1;
      pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_CONSUMED;
#if CI_CFG_ZC_RECV_FILTER
      if( !us->recv_q_filter ) {
        /* Pretend this packet passed the filter, to keep state consistent */
        ++us->recv_q.pkts_filter_passed;
        us->recv_q.bytes_filter_passed += pkt->pf.udp.pay_len;
        pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED;
      }
#endif
    }
    us->udpflags |= CI_UDPF_LAST_RECV_ON;
  }

  return rc;

 recv_q_is_empty:
  return -EAGAIN;
}
Beispiel #10
0
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts,
				   ci_uint32 dst_be32, unsigned dport_be16,
                                   int* fail_rc)
{
  ci_ip_pkt_fmt* pkt;
  int rc = 0;

  ci_assert(ts->s.pkt.mtu);

  /* Now that we know the outgoing route, set the MTU related values.
   * Note, even these values are speculative since the real MTU
   * could change between now and passing the packet to the lower layers
   */
  ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr);
#if CI_CFG_LIMIT_AMSS
  ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__);
#endif

  /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */
  ts->smss = CI_CFG_TCP_DEFAULT_MSS;

  /* set pmtu, eff_mss, snd_buf and adjust windows */
  ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu);
  ci_tcp_set_eff_mss(ni, ts);
  ci_tcp_set_initialcwnd(ni, ts);

  /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay
   * zero until the connection is established.
   */
  ts->so_sndbuf_pkts = 0;

  /* 
   * 3. State and address are OK. It's address routed through our NIC.
   *    Do connect().
   */
  ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY);

  if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) {
    ci_sock_cmn* s = &ts->s;
    ci_uint16 source_be16 = 0;

    if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND )
      rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    else 
      rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16);
    if(CI_LIKELY( rc == 0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ni, ts),
                 ip_addr_str(INADDR_ANY),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));
    }
    else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      *fail_rc = rc;
      return CI_CONNECT_UL_FAIL;
    }
    if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) {
      CI_SET_ERROR(*fail_rc, EINVAL);
      return CI_CONNECT_UL_FAIL;
    }
  }

  ci_tcp_set_peer(ts, dst_be32, dport_be16);

  /* Make sure we can get a buffer before we change state. */
  pkt = ci_netif_pkt_tx_tcp_alloc(ni);
  if( CI_UNLIKELY(! pkt) ) {
    /* NB. We've already done a poll above. */
    rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ);
    if( ci_netif_pkt_wait_was_interrupted(rc) ) {
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_LOCK_DROPPED;
    }
    /* OK, there are (probably) packets available - go try again.  Note we
     * jump back to the top of the function because someone may have
     * connected this socket in the mean-time, so we need to check the
     * state once more.
     */
    return CI_CONNECT_UL_START_AGAIN;
  }

#ifdef ONLOAD_OFE
    if( ni->ofe != NULL )
      ts->s.ofe_code_start = ofe_socktbl_find(
                        ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE,
                        tcp_laddr_be32(ts), tcp_raddr_be32(ts),
                        tcp_lport_be16(ts), tcp_rport_be16(ts));
#endif

  rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice,
                             OO_SP_NULL);
  if( rc < 0 ) {
    /* Perhaps we've run out of filters?  See if we can push a socket out
     * of timewait and steal its filter.
     */
    ci_assert_nequal(rc, -EFILTERSSOME);
    if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) ||
        (rc = ci_tcp_ep_set_filters(ni, S_SP(ts),
                                    ts->s.cp.so_bindtodevice,
                                    OO_SP_NULL)) < 0 ) {
      ci_assert_nequal(rc, -EFILTERSSOME);
      /* Either a different error, or our efforts to free a filter did not
       * work.
       */
      if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) {
        ts->s.pkt.ip.ip_saddr_be32 = 0;
        ts->s.cp.ip_laddr_be32 = 0;
      }
      ci_netif_pkt_release(ni, pkt);
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_FAIL;
    }
  }

  LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts),
	     ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16),
	     ip_addr_str(ts->s.pkt.ip.ip_daddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16)));

  /* We are going to send the SYN - set states appropriately */
  tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
    ci_tcp_initial_seqno(ni);
  ts->snd_max = tcp_snd_nxt(ts) + 1;

  /* Must be after initialising snd_una. */
  ci_tcp_clear_rtt_timing(ts);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN);
  ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK;
  ts->tcpflags |= NI_OPTS(ni).syn_opts;

  if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) {
    ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s));
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl);
  }
  else {
    ts->rcv_wscl = 0;
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0);
  }
  ci_tcp_set_rcvbuf(ni, ts);
  ci_tcp_init_rcv_wnd(ts, "CONNECT");

  /* outgoing_hdrs_len is initialised to include timestamp option. */
  if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) )
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr);
  if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32,
			ts->s.pkt.ip.ip_daddr_be32) )
    ts->tcpflags |= CI_TCPT_FLAG_STRIPE;
  ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT);

  /* If the app trys to send data on a socket in SYN_SENT state
  ** then the data is queued for send until the SYN gets ACKed.
  ** (rfc793 p56)
  **
  ** Receive calls on the socket should block until data arrives
  ** (rfc793 p58)
  **
  ** Clearing tx_errno and rx_errno acheive this. The transmit window
  ** is set to 1 byte which ensures that only the SYN packet gets
  ** sent until the ACK is received with more window. 
  */
  ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1);
  ts->s.rx_errno = 0;
  ts->s.tx_errno = 0; 
  ci_tcp_enqueue_no_data(ts, ni, pkt);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);  

  if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) {
    ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT;
    LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS",
		LNT_PRI_ARGS(ni, ts)));
    CI_SET_ERROR(*fail_rc, EINPROGRESS);
    return CI_CONNECT_UL_FAIL;
  }

  return CI_CONNECT_UL_OK;
}
Beispiel #11
0
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog)
{
  /* 
  ** ?? error handling on possible fails not handled robustly...
  ** ?? Need to check port number is valid TODO
  */

  /*! \todo If not bound then we have to be listening on all interfaces.
   * It's likely that we won't be coming through here as we have to
   * listen on the OS socket too! */
  ci_tcp_state* ts;
  ci_tcp_socket_listen* tls;
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  unsigned ul_backlog = backlog;
  int rc;
  oo_p sp;

  LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), 
             backlog));
  CHECK_TEP(ep);

  if( NI_OPTS(netif).tcp_listen_handover )
    return CI_SOCKET_HANDOVER;
  if( !NI_OPTS(netif).tcp_server_loopback) {
    /* We should handover if the socket is bound to alien address. */
    if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
      return CI_SOCKET_HANDOVER;
  }

  if( ul_backlog < 0 )
    ul_backlog = NI_OPTS(netif).max_ep_bufs;
  else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog )
    ul_backlog = NI_OPTS(netif).acceptq_min_backlog;

  if( s->b.state == CI_TCP_LISTEN ) {
    tls = SOCK_TO_TCP_LISTEN(s);
    tls->acceptq_max = ul_backlog;
    ci_tcp_helper_listen_os_sock(fd, ul_backlog);
    return 0;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    CI_SET_ERROR(rc, EINVAL);
    return rc;
  }


  ts = SOCK_TO_TCP(s);

  /* Bug 3376: if socket used for a previous, failed, connect then the error
   * numbers will not be as expected.  Only seen when not using listening
   * netifs (as moving the EP to the new netif resets them). 
   */

  ts->s.tx_errno = EPIPE;



  ts->s.rx_errno = ENOTCONN;

  /* fill in address/ports and all TCP state */
  if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) {
    ci_uint16 source_be16;

    /* They haven't previously done a bind, so we need to choose 
     * a port.  As we haven't been given a hint we let the OS choose. */

    source_be16 = 0;
    rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    if (CI_LIKELY( rc==0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ep->netif, ts),
                 ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));

    } else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      return rc;
    }
  } 

  ci_sock_lock(netif, &ts->s.b);
  ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);

  tcp_raddr_be32(tls) = 0u;
  tcp_rport_be16(tls) = 0u;

  ci_assert_equal(tls->s.tx_errno, EPIPE);



  ci_assert_equal(tls->s.rx_errno, ENOTCONN);

  /* setup listen timer - do it before the first return statement,
   * because __ci_tcp_listen_to_normal() will be called on error path. */
  if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
    sp = TS_OFF(netif, tls);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid));
    ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq");
    tls->listenq_tid.param1 = S_SP(tls);
    tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN;
  }

  rc = ci_tcp_listen_init(netif, tls);
  ci_sock_unlock(netif, &ts->s.b);
  if( rc != 0 ) {
    CI_SET_ERROR(rc, -rc);
    goto listen_fail;
  }
  tls->acceptq_max = ul_backlog;

  CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats));

  /* install all the filters needed for this connection 
   *    - tcp_laddr_be32(ts) = 0 for IPADDR_ANY
   *
   *  TODO: handle BINDTODEVICE by setting phys_port paramter to correct 
   *        physical L5 port index
   *  TODO: handle REUSEADDR by setting last paramter to TRUE
   */
  if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
#ifdef ONLOAD_OFE
    if( netif->ofe != NULL ) {
      tls->s.ofe_code_start = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_LISTEN,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
      tls->ofe_promote = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
    }
#endif
    rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
      if( CITP_OPTS.no_fail )
        rc = 0;
      else {
        ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
        rc = -ENOBUFS;
      }
    }
    ci_assert_nequal(rc, -EFILTERSSOME);
    VERB(ci_log("%s: set_filters  returned %d", __FUNCTION__, rc));
    if (rc < 0) {
      CI_SET_ERROR(rc, -rc);
      goto post_listen_fail;
    }
  }


  /* 
   * Call of system listen() is required for listen any, local host
   * communications server and multi-homed server (to accept connections
   * to L5 assigned address(es), but incoming from other interfaces).
   */
#ifdef __ci_driver__
  {
    rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif),
					 S_SP(tls), backlog);
  }
#else
  rc = ci_tcp_helper_listen_os_sock(fd, backlog);
#endif
  if ( rc < 0 ) {
    /* clear the filter we've just set */
    ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
    goto post_listen_fail;
  }
  return 0;

 post_listen_fail:
  ci_tcp_listenq_drop_all(netif, tls);
 listen_fail:
  /* revert TCP state to a non-listening socket format */
  __ci_tcp_listen_to_normal(netif, tls);
  /* Above function sets orphan flag but we are attached to an FD. */
  ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
#ifdef __ci_driver__
  return rc;
#else
  return CI_SOCKET_ERROR;
#endif
}