示例#1
0
void
ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p,
		       unsigned laddr, unsigned lport,
		       unsigned raddr, unsigned rport, unsigned protocol)
{
  ci_netif_filter_table_entry* entry;
  unsigned hash1, hash2, tbl_i;
  ci_netif_filter_table* tbl;
  int hops = 0;
  unsigned first;

  ci_assert(ci_netif_is_locked(netif)
#ifdef __KERNEL__
            /* release_ep_tbl might be called without the stack lock.
             * Do not complain about this. */
            || (netif2tcp_helper_resource(netif)->k_ref_count &
                TCP_HELPER_K_RC_DEAD)
#endif
            );


  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_TC(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u",
                __FUNCTION__, NI_ID(netif), OO_SP_FMT(sock_p),
                CI_IP_PROTOCOL_STR(protocol),
		ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
		ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
		hash1, hash2));

  tbl_i = hash1;
  while( 1 ) {
    entry = &tbl->table[tbl_i];
    if( entry->id == OO_SP_TO_INT(sock_p) ) {
      if( laddr == entry->laddr )
        break;
    }
    else if( entry->id == EMPTY ) {
      /* We allow multiple removes of the same filter -- helps avoid some
       * complexity in the filter module.
       */
      return;
    }
    tbl_i = (tbl_i + hash2) & tbl->table_size_mask;
    ++hops;
    if( tbl_i == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP [%d] %s %s:%u->%s:%u",
                   FN_PRI_ARGS(netif), OO_SP_FMT(sock_p),
                   CI_IP_PROTOCOL_STR(protocol),
                   ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
                   ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport)));
      return;
    }
  }

  __ci_netif_filter_remove(netif, hash1, hash2, hops, tbl_i);
}
示例#2
0
/**
 * callback called by nipns_client
 * - this seems used for the iaddr in the stun_t which will soon be obsolete
 *   by the abstraction of tunnel and connection
 */
static void onet_tunnel_resp_iaddr_req_cb( void *userptr, ns_result_t result )
{
	onet_tunnel_t		*tunnel		= userptr;
	onet_resp_iaddr_req_t 	*iaddr_req	= tunnel->resp_iaddr_req;
	ns_db_record_t		*record;
	gen_hd_t		*gen_hd;
	// check if the record has been found. if not, close the tunnel
	if( result != NS_RESULT_FOUND ){
		LOGM_ERR("cant get the inner address of the remote nipid for this connection. ns_result=%s\n", ns_result_str(result));
		onet_tunnel_close( tunnel );
		return;
	}
	// get the found record
	record = ns_util_db_get( NULL, &iaddr_req->remote_nipid, NS_REC_DNS_HOSTNAME );
	DBG_ASSERT( record );
	// copy the remote_iaddr
	gen_hd	= record->data;
	ip_addr_from_payload( &tunnel->remote_iaddr, payl_gen_hd_data(gen_hd) );
	DBG("set the tunnel remote_iaddr to %s\n", ip_addr_str(&tunnel->remote_iaddr) );
	// close the resp_iaddr_req
	onet_tunnel_resp_iaddr_req_close( tunnel );
	// logging
	LOGM_WARNING("tunnel established with %s (%s) as responder\n"
			, stun_get_remote_identity(tunnel->stun)
			, ip_addr_str(&tunnel->remote_iaddr) );
}
示例#3
0
ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, 
				      unsigned lport, unsigned raddr, 
				      unsigned rport, unsigned protocol)
{
  int rc;

  /* try full lookup */
  rc = ci_netif_filter_lookup(netif, laddr, lport,  raddr, rport, protocol);
  LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d",
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     rc));    

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);

  /* try wildcard lookup */
  raddr = rport = 0;
  rc = ci_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol);
  LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d",
	    ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	    ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	    rc));

  if(CI_LIKELY( rc >= 0 ))
    return ID_TO_SOCK(netif, netif->filter_table->table[rc].id);
 
  return 0;
}
示例#4
0
static ci_tcp_state_synrecv*
ci_tcp_listenq_bucket_lookup(ci_netif* ni, ci_tcp_listen_bucket* bucket,
                             ciip_tcp_rx_pkt* rxp,
                             int level)
{
  ci_ni_aux_mem* aux;
  int idx = ci_tcp_listenq_hash2idx(rxp->hash, level);
  ci_tcp_state_synrecv* tsr;
  unsigned saddr, daddr, sport;
#ifdef __KERNEL__
  int i = 0;

  if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) {
    ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                            __FUNCTION__);
    return 0;
  }
#endif

  LOG_TV(ci_log("%s([%d] level=%d hash:%x l:%s r:%s:%d)", __func__,
                NI_ID(ni), level, rxp->hash,
                ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32),
                ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32),
                CI_BSWAP_BE16(rxp->tcp->tcp_source_be16)));
  if( OO_P_IS_NULL(bucket->bucket[idx]) )
    return NULL;

  level++;
  aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]);
  if( aux->type == CI_TCP_AUX_TYPE_BUCKET )
    return ci_tcp_listenq_bucket_lookup(ni, &aux->u.bucket, rxp, level);

  saddr = oo_ip_hdr(rxp->pkt)->ip_saddr_be32;
  daddr = oo_ip_hdr(rxp->pkt)->ip_daddr_be32;
  sport = rxp->tcp->tcp_source_be16;

  tsr = &aux->u.synrecv;
  do {
    if( ! ((saddr - tsr->r_addr) | (daddr - tsr->l_addr) |
           (sport - tsr->r_port)) )
      return tsr;
    if( OO_P_IS_NULL(tsr->bucket_link) )
      return NULL;
    aux = ci_ni_aux_p2aux(ni, tsr->bucket_link);
    tsr = &aux->u.synrecv;
#ifdef __KERNEL__
    if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) {
      ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE,
                              __FUNCTION__);
      return NULL;
    }
#endif
  } while(1);

  /* unreachable */
  return NULL;
}
示例#5
0
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr,
                                    unsigned lport, unsigned raddr,
                                    unsigned rport, unsigned protocol,
                                    int intf_i, int vlan,
                                    int (*callback)(ci_sock_cmn*, void*),
                                    void* callback_arg, ci_uint32* hash_out)
{
  ci_netif_filter_table* tbl;
  unsigned hash1, hash2 = 0;
  unsigned first;

  tbl = ni->filter_table;
  if( hash_out != NULL )
    *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol);
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u",
             __FUNCTION__, CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if(CI_LIKELY( id >= 0 )) {
      ci_sock_cmn* s = ID_TO_SOCK(ni, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
        if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD ||
                       ci_sock_intf_check(ni, s, intf_i, vlan)) ))
          if( callback(s, callback_arg) != 0 )
            return;
    }
    else if( id == EMPTY )
      break;
    /* We defer calculating hash2 until it's needed, just to make the fast
    ** case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(ni), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      break;
    }
  }
}
示例#6
0
static char * ci_udp_addr_str( ci_udp_state* us )
{
    static char buf[128];

    ci_assert(us);
    sprintf( buf, "L[%s:%d] R[%s:%d]",
             ip_addr_str( udp_laddr_be32(us)),
             CI_BSWAP_BE16(udp_lport_be16(us)),
             ip_addr_str( udp_raddr_be32(us)),
             CI_BSWAP_BE16(udp_rport_be16(us)) );
    return buf;
}
示例#7
0
static int onet_httpd_handler_cb(char *path, httpd_var_t *httpd_var, GByteArray *buf )
{
	onet_t	*onet	= onet_main;	
	GList	*elem;
	httpd_printf_page_title(buf, "NeoIp Router" );
	httpd_printf(buf,"<b>Local ip address:</b> %s<br>", ip_addr_str(&onet->ip_iaddr) );
	httpd_printf(buf,"<b>Local ip netmask:</b> %s<br>", ip_netmask_str(&onet->ip_netmask) );
	httpd_printf(buf,"<b>Local cnxid:</b> %s<br>", nipid_str(&onet->local_cnxid) );
	httpd_printf(buf,"<b>virtual network device:</b> %s<br>", onet->vdev.dev_name );

	httpd_printf(buf,"<hr><h3><div align=\"center\">Tunnel List:</div></h3>" );
	httpd_printf_table_start(buf);
	httpd_printf_tr_start(buf);
	httpd_printf_th(buf, "remote iaddr");
	httpd_printf_th(buf, "state");
	httpd_printf_th(buf, "remote identity");
	httpd_printf_th(buf, "info on connection");
	httpd_printf_th_title(buf, "number of packets per sec estimated of the last 5-sec", "packet rate");
	httpd_printf_th_title(buf, "number of kbyte per sec estimated of the last 5-sec", "throughput");
	httpd_printf_tr_end(buf);
	for( elem = onet->tunnel_list; elem; elem = g_list_next( elem ) ){
		onet_tunnel_t	*tunnel	= elem->data;
		httpd_printf_tr_start(buf);
		if( tunnel->itor ){
			httpd_printf_td(buf, "%s", ip_addr_str(&tunnel->remote_iaddr));
			httpd_printf_td(buf, "Initiating");
			httpd_printf_td(buf, "none yet");
			httpd_printf_td(buf, "<a href=\"%s\" title=\"Provide details on this connection's initiator\">X</a>"
								, itor_httpd_get_link(tunnel->itor, "disp_single"));
		}else if( tunnel->resp_iaddr_req ){
			httpd_printf_td(buf, "none yet");
			httpd_printf_td(buf, "Responding");
			httpd_printf_td(buf, "none yet");
			httpd_printf_td(buf, "none yet");
		}else{
			httpd_printf_td(buf, "%s", ip_addr_str(&tunnel->remote_iaddr));
			httpd_printf_td(buf, "Established");
			DBG_ASSERT( tunnel->stun );
			httpd_printf_td(buf, "%s", stun_get_remote_identity(tunnel->stun) );
			httpd_printf_td(buf, "<a href=\"%s\" title=\"Provide details on this established connection\">X</a>"
								, stun_httpd_get_link(tunnel->stun, "disp_single"));
		}
		httpd_printf_td(buf, "%.2lf pkt/sec", rate_estim_get_avg_delay(tunnel->pkt_rate, 5*1000, 1000) );
		httpd_printf_td(buf, "%.2lf kbyte/sec", rate_estim_get_avg_delay(tunnel->throughput, 5*1000, 1000)/1024 );
		httpd_printf_tr_end(buf);
	}
	httpd_printf_table_end(buf);		
	return 0;
}
示例#8
0
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport,
			   unsigned raddr, unsigned rport, unsigned protocol)
{
  unsigned hash1, hash2 = 0;
  ci_netif_filter_table* tbl;
  unsigned first;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));
  ci_assert(netif->filter_table);

  tbl = netif->filter_table;
  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u",
	     CI_IP_PROTOCOL_STR(protocol),
	     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
	     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
	     first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol),
	     hash1));

  while( 1 ) {
    int id = tbl->table[hash1].id;
    if( CI_LIKELY(id >= 0) ) {
      ci_sock_cmn* s = ID_TO_SOCK(netif, id);
      if( ((laddr    - tbl->table[hash1].laddr) |
	   (lport    - sock_lport_be16(s)     ) |
	   (raddr    - sock_raddr_be32(s)     ) |
	   (rport    - sock_rport_be16(s)     ) |
	   (protocol - sock_protocol(s)       )) == 0 )
      	return hash1;
    }
    if( id == EMPTY )  break;
    /* We defer calculating hash2 until it's needed, just to make the fast
     * case that little bit faster. */
    if( hash1 == first )
      hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
    hash1 = (hash1 + hash2) & tbl->table_size_mask;
    if( hash1 == first ) {
      LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u",
                   FN_PRI_ARGS(netif), ip_addr_str(laddr), lport,
		   ip_addr_str(raddr), rport, hash1, hash2));
      return -ELOOP;
    }
  }

  return -ENOENT;
}
示例#9
0
/**
 * callback called by itor when the connection succeed or fail
 */
static void onet_itor_cnx_result_cb( void *userptr, itor_cnx_result_t result, stun_t *stun )
{
	onet_t		*onet	= onet_main;
	onet_tunnel_t	*tunnel = userptr;
	DBG("enter result=%d stun=%p\n", result, stun);
	// if the result isnt connected, close this tunnel
	if( result != ITOR_RESULT_CONNECTED ){
		// put the destination ip address in the negcache
		dst_iaddr_negcache_add( onet->dst_iaddr_negcache, &tunnel->remote_iaddr, 0 );
		// loging for diagnostict
		LOGM_ERR("tunnel initialization failed because %s\n", itor_cnx_result_str(result) );
		// close the tunnel
		onet_tunnel_close( tunnel );
		return;
	}
	// sanity check
	DBG_ASSERT( tunnel->itor );
	DBG_ASSERT( stun );
	// close the itor
	itor_stop( tunnel->itor );
	tunnel->itor = NULL;
	// update the struct
	tunnel->stun	= stun;
	// set the stun callback
	stun_set_inerr_cb( tunnel->stun, onet_stun_inerr_cb, tunnel );
	// complete the init with the common part
	onet_tunnel_init_common( tunnel );
	// flush triggering packet
	onet_tunnel_itor_trigger_pkt_flush( tunnel );
	// logging
	LOGM_WARNING("tunnel established with %s (%s) as itor\n"
			, stun_get_remote_identity(tunnel->stun)
			, ip_addr_str(&tunnel->remote_iaddr) );
}
示例#10
0
/**
 * return non-null if the address is a local one
 */
int onet_is_local_addr( ip_addr_t *ip_addr )
{
	// TODO if ip_netmask is null, return 0;
	DBG("is %s belong to %s\n", ip_addr_str( ip_addr ), ip_netmask_str( &onet_main->ip_netmask) );
	if( ip_addr_belong_to_netmask( ip_addr, &onet_main->ip_netmask ) )
		return 1;
	return 0;
}
示例#11
0
/*
** See if there is a synrecv object that matches this syn request already.
*/
ci_tcp_state_synrecv*
ci_tcp_listenq_lookup(ci_netif* netif, ci_tcp_socket_listen* tls,
                      ciip_tcp_rx_pkt* rxp)
{
  ci_tcp_state_synrecv* tsr;

  tsr = ci_tcp_listenq_bucket_lookup(
                        netif, ci_ni_aux_p2bucket(netif, tls->bucket),
                        rxp, 0);
  if( tsr == NULL ) {
    LOG_TV(log(LPF "no match for %s:%d->%s:%d",
               ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32),
               (int) CI_BSWAP_BE16(rxp->tcp->tcp_source_be16),
               ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32),
               (int) CI_BSWAP_BE16(rxp->tcp->tcp_dest_be16)));
  }

  return tsr;
}
示例#12
0
/**
 * set a netmask route for this vdev
 */
static int onet_add_route_netmask( ip_netmask_t *netmask )
{
	ip_addr_t	ip_addr;
	char	cmd[300];
	onet_main->ip_netmask	= *netmask;
	// honor the tun_stub user option
	if( prop_get_bool_dfl( "neoip_router", "debug:tun_stub", 0 ) )	return 0;
	// TODO this function is really lame
	// - the system()
	// - what if the ip addr already exists
	ip_netmask_any_addr(netmask, &ip_addr);
	snprintf(cmd,sizeof(cmd), "ip route add %s/%d dev %s"
					, ip_addr_str(&ip_addr)
					, ip_netmask_get_prefix_len(netmask)
					, onet_main->vdev.dev_name );	
	DBG("exec a cmd <%s> LAME change it to syscall\n", cmd );
	system( cmd );
	return 0;
}
示例#13
0
/**
 * return 0 if the local database contain a NS_REC_DNS_IP_ADDR for this dst_iaddr, non null otherwise
 * - if there is no record, launch a query for it
 */
static int onet_ns_req_dst_iaddr_test( ip_addr_t *dst_iaddr )
{
	onet_t			*onet	= onet_main;	
	nipid_t			recordid;
	onet_ns_req_dst_iaddr_t	*ns_req_dst_iaddr;
	nipid_build_dns_ip_addr( &recordid, dst_iaddr );
	// if the record is already in the database, do nothing
	if( ns_util_db_get( NULL, &recordid, NS_REC_DNS_IP_ADDR ) )	return 0;
	// if there is a pending ns_query for it, do nothing but return faillure
	if( onet_ns_req_dst_iaddr_get( dst_iaddr ) )			return 1;
	// if there is no query, launch one
	ns_req_dst_iaddr = nipmem_zalloc(sizeof(*ns_req_dst_iaddr));
	// fill the struct 
	ns_req_dst_iaddr->dst_iaddr = *dst_iaddr;
	ns_req_dst_iaddr->ns_req = ns_query_req_open( NULL, &recordid, NS_REC_DNS_IP_ADDR, ONET_DELAY_B4_ICMP
					, onet_ns_req_dns_ip_addr_ns_query_cb, ns_req_dst_iaddr );
	// update to the list
	onet->ns_req_dst_iaddr_list = g_list_append(onet->ns_req_dst_iaddr_list, ns_req_dst_iaddr );	
	DBG("launch a ns_req_dst_iaddr for %s\n", ip_addr_str(dst_iaddr) );
	// return faillure
	return 1;
}
示例#14
0
/**
 * called when the iaddr allocation provides a result
 */
static void onet_iaddr_log_client_cb( void *userptr, iaddr_log_client_state_t result,
				 ip_addr_t *ip_addr, ip_netmask_t *ip_netmask )
{
	onet_t	*onet	= onet_main;
	DBG("enter result=%d\n", result);
	if( result != IADDR_LOG_CLIENT_STATE_SUCCEED ){
		// TODO what do i do here ?!?!?!?! i close the tunnel ?
		// - i retry in 10sec ? YES!!! anyway you cant do anything without it
		return;
	}
	// sanity check
	DBG_ASSERT( result == IADDR_LOG_CLIENT_STATE_SUCCEED );
	// some logging
	DBG("SUCCEED ip_addr=%s ip_netmask=%s\n", ip_addr_str(ip_addr), ip_netmask_str(ip_netmask) );
	// update the struct
	onet->ip_iaddr	 	= *ip_addr;
	onet->ip_netmask	= *ip_netmask;
	// close the iaddr_log_client
	iaddr_log_client_close(	&onet->iaddr_log_client );
	// complete the init of the onet
	onet_init_post_iaddr();
}
示例#15
0
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog)
{
  /* 
  ** ?? error handling on possible fails not handled robustly...
  ** ?? Need to check port number is valid TODO
  */

  /*! \todo If not bound then we have to be listening on all interfaces.
   * It's likely that we won't be coming through here as we have to
   * listen on the OS socket too! */
  ci_tcp_state* ts;
  ci_tcp_socket_listen* tls;
  ci_netif* netif = ep->netif;
  ci_sock_cmn* s = ep->s;
  unsigned ul_backlog = backlog;
  int rc;
  oo_p sp;

  LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), 
             backlog));
  CHECK_TEP(ep);

  if( NI_OPTS(netif).tcp_listen_handover )
    return CI_SOCKET_HANDOVER;
  if( !NI_OPTS(netif).tcp_server_loopback) {
    /* We should handover if the socket is bound to alien address. */
    if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN )
      return CI_SOCKET_HANDOVER;
  }

  if( ul_backlog < 0 )
    ul_backlog = NI_OPTS(netif).max_ep_bufs;
  else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog )
    ul_backlog = NI_OPTS(netif).acceptq_min_backlog;

  if( s->b.state == CI_TCP_LISTEN ) {
    tls = SOCK_TO_TCP_LISTEN(s);
    tls->acceptq_max = ul_backlog;
    ci_tcp_helper_listen_os_sock(fd, ul_backlog);
    return 0;
  }

  if( s->b.state != CI_TCP_CLOSED ) {
    CI_SET_ERROR(rc, EINVAL);
    return rc;
  }


  ts = SOCK_TO_TCP(s);

  /* Bug 3376: if socket used for a previous, failed, connect then the error
   * numbers will not be as expected.  Only seen when not using listening
   * netifs (as moving the EP to the new netif resets them). 
   */

  ts->s.tx_errno = EPIPE;



  ts->s.rx_errno = ENOTCONN;

  /* fill in address/ports and all TCP state */
  if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) {
    ci_uint16 source_be16;

    /* They haven't previously done a bind, so we need to choose 
     * a port.  As we haven't been given a hint we let the OS choose. */

    source_be16 = 0;
    rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    if (CI_LIKELY( rc==0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ep->netif, ts),
                 ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));

    } else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      return rc;
    }
  } 

  ci_sock_lock(netif, &ts->s.b);
  ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN);
  tls = SOCK_TO_TCP_LISTEN(&ts->s);

  tcp_raddr_be32(tls) = 0u;
  tcp_rport_be16(tls) = 0u;

  ci_assert_equal(tls->s.tx_errno, EPIPE);



  ci_assert_equal(tls->s.rx_errno, ENOTCONN);

  /* setup listen timer - do it before the first return statement,
   * because __ci_tcp_listen_to_normal() will be called on error path. */
  if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
    sp = TS_OFF(netif, tls);
    OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid));
    ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq");
    tls->listenq_tid.param1 = S_SP(tls);
    tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN;
  }

  rc = ci_tcp_listen_init(netif, tls);
  ci_sock_unlock(netif, &ts->s.b);
  if( rc != 0 ) {
    CI_SET_ERROR(rc, -rc);
    goto listen_fail;
  }
  tls->acceptq_max = ul_backlog;

  CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats));

  /* install all the filters needed for this connection 
   *    - tcp_laddr_be32(ts) = 0 for IPADDR_ANY
   *
   *  TODO: handle BINDTODEVICE by setting phys_port paramter to correct 
   *        physical L5 port index
   *  TODO: handle REUSEADDR by setting last paramter to TRUE
   */
  if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) {
#ifdef ONLOAD_OFE
    if( netif->ofe != NULL ) {
      tls->s.ofe_code_start = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_LISTEN,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
      tls->ofe_promote = ofe_socktbl_find(
                        netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE,
                        tcp_laddr_be32(tls), INADDR_ANY,
                        tcp_lport_be16(ts), 0);
    }
#endif
    rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice,
                               OO_SP_NULL);
    if( rc == -EFILTERSSOME ) {
      if( CITP_OPTS.no_fail )
        rc = 0;
      else {
        ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
        rc = -ENOBUFS;
      }
    }
    ci_assert_nequal(rc, -EFILTERSSOME);
    VERB(ci_log("%s: set_filters  returned %d", __FUNCTION__, rc));
    if (rc < 0) {
      CI_SET_ERROR(rc, -rc);
      goto post_listen_fail;
    }
  }


  /* 
   * Call of system listen() is required for listen any, local host
   * communications server and multi-homed server (to accept connections
   * to L5 assigned address(es), but incoming from other interfaces).
   */
#ifdef __ci_driver__
  {
    rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif),
					 S_SP(tls), backlog);
  }
#else
  rc = ci_tcp_helper_listen_os_sock(fd, backlog);
#endif
  if ( rc < 0 ) {
    /* clear the filter we've just set */
    ci_tcp_ep_clear_filters(netif, S_SP(tls), 0);
    goto post_listen_fail;
  }
  return 0;

 post_listen_fail:
  ci_tcp_listenq_drop_all(netif, tls);
 listen_fail:
  /* revert TCP state to a non-listening socket format */
  __ci_tcp_listen_to_normal(netif, tls);
  /* Above function sets orphan flag but we are attached to an FD. */
  ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT);
#ifdef __ci_driver__
  return rc;
#else
  return CI_SOCKET_ERROR;
#endif
}
示例#16
0
/* Complete a UDP U/L connect.  The sys connect() call must have been made
 * (and succeeded) before calling this function.  So if anything goes wrong
 * in here, then it can be consider an internal error or failing of onload.
 */
int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd,
                            const struct sockaddr* serv_addr,
                            socklen_t addrlen, ci_fd_t os_sock)
{
    const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr;
    ci_uint32 dst_be32;
    ci_udp_state* us = SOCK_TO_UDP(ep->s);
    int onloadable;
    int rc = 0;

    CHECK_UEP(ep);

    UDP_CLR_FLAG(us, CI_UDPF_EF_SEND);
    us->s.rx_errno = 0;
    us->s.tx_errno = 0;

    if( IS_DISCONNECTING(serv_sin) ) {
        rc = ci_udp_disconnect(ep, us, os_sock);
        goto out;
    }
#if CI_CFG_FAKE_IPV6
    if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) {
        LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us)));
        goto handover;
    }
#endif

    dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr);
    if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) {
        LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)",
                  FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                  CI_BSWAP_BE16(serv_sin->sin_port), errno));
        goto out;
    }

    us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED;
    ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port);
    cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp);

    switch( us->s.pkt.status ) {
    case retrrc_success:
    case retrrc_nomac:
        onloadable = 1;
        break;
    default:
        onloadable = 0;
        if( NI_OPTS(ep->netif).udp_connect_handover ) {
            LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                       ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
            goto handover;
        }
        break;
    }

    if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) {
        LOG_UC(log(FNT_FMT "%s:%d - route via OS socket",
                   FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                   CI_BSWAP_BE16(serv_sin->sin_port)));
        ci_udp_clr_filters(ep);
        return 0;
    }
    if( CI_IP_IS_LOOPBACK(dst_be32) ) {
        /* After connecting via loopback it is not possible to connect anywhere
         * else.
         */
        LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us),
                   ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port)));
        goto handover;
    }

    if( onloadable ) {
#ifdef ONLOAD_OFE
        if( ep->netif->ofe != NULL )
            us->s.ofe_code_start = ofe_socktbl_find(
                                       ep->netif->ofe, OFE_SOCKTYPE_UDP,
                                       udp_laddr_be32(us), udp_raddr_be32(us),
                                       udp_lport_be16(us), udp_rport_be16(us));
#endif

        if( (rc = ci_udp_set_filters(ep, us)) != 0 ) {
            /* Failed to set filters.  Most likely we've run out of h/w filters.
             * Handover to O/S to avoid breaking the app.
             *
             * TODO: Actually we probably won't break the app if we don't
             * handover, as packets will still get delivered via the kernel
             * stack.  Might be worth having a runtime option to choose whether
             * or not to handover in such cases.
             */
            LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)",
                      FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32),
                      CI_BSWAP_BE16(serv_sin->sin_port), rc));
            CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter);
            goto out;
        }
    }
    else {
        ci_udp_clr_filters(ep);
    }

    LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)",
               SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS",
               ip_addr_str(udp_laddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)),
               ip_addr_str(udp_raddr_be32(us)),
               (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno));
    return 0;

out:
    if( rc < 0 && CITP_OPTS.no_fail )
        goto handover;
    return rc;

handover:
    ci_udp_clr_filters(ep);
    return CI_SOCKET_HANDOVER;
}
示例#17
0
/* In this bind handler we just check that the address to which
 * are binding is either "any" or one of ours. 
 * In the Linux kernel version [fd] is unused.
 */
int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr,
                socklen_t addrlen, ci_fd_t fd )
{
  struct sockaddr_in* my_addr_in;
  ci_uint16 new_port;
  ci_uint32 addr_be32;
  ci_sock_cmn* s = ep->s;
  ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp;
  int rc;

  CHECK_TEP(ep);

  my_addr_in = (struct sockaddr_in*) my_addr;

  /* Check if state of the socket is OK for bind operation. */
  /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used.
   *       What is better? */
  if (my_addr == NULL)
    RET_WITH_ERRNO( EINVAL );


  if (s->b.state != CI_TCP_CLOSED)
    RET_WITH_ERRNO( EINVAL );

  if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB)
    RET_WITH_ERRNO( EINVAL );

  if( my_addr->sa_family != s->domain )
    RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL );

  /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) 
   * Linux is also relaxed about overlength data areas. */
  if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in))
    RET_WITH_ERRNO( EINVAL );

#if CI_CFG_FAKE_IPV6
  if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133)
    RET_WITH_ERRNO( EINVAL );

  if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) )
    return CI_SOCKET_HANDOVER;
#endif
  addr_be32 = ci_get_ip4_addr(s->domain, my_addr);
 
  /* Using the port number provided, see if we can do this bind */
  new_port = my_addr_in->sin_port;

  if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) {
    struct ci_port_list *force_reuseport;
    CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link,
                        (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) {
      if( force_reuseport->port == new_port ) {
        int one = 1;
        ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd);
        ci_assert(CI_IS_VALID_SOCKET(os_sock));
        rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one,
                               sizeof(one));
        ci_rel_os_sock_fd(os_sock);
        if( rc != 0 && errno == ENOPROTOOPT )
          ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY;
        ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT;
        LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u",
                   __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port));
      }
    }
  }

  if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) 
    CI_LOGLEVEL_TRY_RET(LOG_TV,
		        __ci_bind(ep->netif, ep->s, addr_be32, &new_port));
  ep->s->s_flags |= CI_SOCK_FLAG_BOUND;
  sock_lport_be16(s) = new_port; 
  sock_laddr_be32(s) = addr_be32;
  if( CI_IP_IS_MULTICAST(addr_be32) )
    s->cp.ip_laddr_be32 = 0;
  else
    s->cp.ip_laddr_be32 = addr_be32;
  s->cp.lport_be16 = new_port;
  sock_rport_be16(s) = sock_raddr_be32(s) = 0;

  LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32),
	     (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port),
	     CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); 

  return 0;
}
示例#18
0
/**
 * callback to received data from the vdev
 * - if a established tunnel exist, send the packet to it
 * - if a tunnel is currently in establishment, set the triggering packet
 * - if no tunnel exist, start the establishement on one
 */
static void onet_vdev_in( void *userptr, int ethertype, char *pkt, int pkt_len )
{
	struct 	iphdr	*iph	= (struct iphdr *)pkt;
	onet_t		*onet	= onet_main;	
	onet_tunnel_t	*tunnel;
	ip_addr_t	dst_iaddr;
	
	DBG("enter ethertype=0x%x\n", ethertype );
	// handle only ipv4 for now
	EXP_ASSERT( ethertype == ETHERTYPE_IP );
	// sanity check
// TODO put the basic check of the ipv4 packet in a function
	if( pkt_len < sizeof(*iph) ){
		LOG(0,"received bogus packet of %d-byte. not even big enought for an ipv4 header\n", pkt_len );
		return;
	}
	if( pkt_len < iph->ihl*4 ){
		LOG(0,"received bogus packet of %d-byte with ipv4_hd->ihl=%d\n", pkt_len, iph->ihl*4 );
		return;
	}
	// get the destination ip address from the packet
	ip_addr_v4_set( &dst_iaddr, ntohl(iph->daddr) );
	// find a existing tunnel if there is any
	tunnel = onet_tunnel_from_remote_iaddr( &dst_iaddr );
	// if there is a tunnel and the connection is already established, send it thru it
	if( tunnel && tunnel->stun ){
		DBG("there is already a establish link for this packet \n");
		// TMP: just to test the limitor
		if( rate_limit_exceeded( tunnel->thput_limit) ){
//			LOGM_ERR("packet discarded due to rate limiter\n");
			return;
		}
		// update the pkt_rate and throughput
		rate_estim_add( tunnel->pkt_rate, 1 );
		rate_estim_add( tunnel->throughput, pkt_len );
		// send the packet
		stun_out_data( tunnel->stun, ethertype, pkt, pkt_len );
		return;
	}

	// if the ipaddr is in the dst_iaddr_negcache, return
	if( dst_iaddr_negcache_is_present( onet->dst_iaddr_negcache, &dst_iaddr) ){
		// return an ICMP if the ip record is in the dst_iaddr_negcache
		// - apply the concept of not replying a icmp immediatly to let
		//   the time to resolve the address
		// - similar to the time to solve the hw address with ARP
		// - as in rfc2461.7.2.2, ICMP must be replied after 3sec
		//   - it is ONET_DELAY_B4_ICMP
		// TODO the timer aspect isnt well respected now
		//      - itor has its own timer see bug 359
		//      - onet_ns_req_dst_iaddr_* honor it tho
		raw_icmp_reply_send( ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, pkt, pkt_len );
		return;		
	}
	// if there no record for this dst_iaddr in the local database, discard the packet
	if( onet_ns_req_dst_iaddr_test( &dst_iaddr ) ){
		DBG("received packet for which im unable to find a rdvpoint\n" );
		return;
	}

	// if a establishing tunnel exists, update the trigerring packet
	if( tunnel ){
		DBG_ASSERT( tunnel->itor );
		// update the trigger packet
		onet_tunnel_itor_trigger_pkt_set( tunnel, ethertype, pkt, pkt_len );
		DBG("tunnel is currently in establishement for this packet\n");
		return;
	}
	DBG_ASSERT( !tunnel );

	// create a tunnel as itor
	tunnel = onet_tunnel_open_itor( &dst_iaddr );
	if( !tunnel ){
		LOGM_ERR("can't initiate a tunnel toward the iaddr %s\n", ip_addr_str( &dst_iaddr ) );
		return;
	}
	// set the trigger packet
	onet_tunnel_itor_trigger_pkt_set( tunnel, ethertype, pkt, pkt_len );
}
示例#19
0
/* Insert for either TCP or UDP */
int ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id,
			   unsigned laddr, unsigned lport,
			   unsigned raddr, unsigned rport, unsigned protocol)
{
  ci_netif_filter_table_entry* entry;
  unsigned hash1, hash2;
  ci_netif_filter_table* tbl;
#if !defined(NDEBUG) || CI_CFG_STATS_NETIF
  unsigned hops = 1;
#endif
  unsigned first;

  ci_assert(netif);
  ci_assert(ci_netif_is_locked(netif));
  ci_assert(netif->filter_table);
  tbl = netif->filter_table;

  hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol);
  hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol);
  first = hash1;

  /* Find a free slot. */
  while( 1 ) {
    entry = &tbl->table[hash1];
    if( entry->id < 0 )  break;

    ++entry->route_count;
#if !defined(NDEBUG) || CI_CFG_STATS_NETIF
    ++hops;
#endif

    /* A socket can only have multiple entries in the filter table if each
     * entry has a different [laddr].
     */
    ci_assert(
      !((entry->id == OO_SP_TO_INT(tcp_id)) && (laddr == entry->laddr)) );

    hash1 = (hash1 + hash2) & tbl->table_size_mask;

    if( hash1 == first ) {
      ci_sock_cmn *s = SP_TO_SOCK_CMN(netif, tcp_id);
      if( ! (s->s_flags & CI_SOCK_FLAG_SW_FILTER_FULL) ) {
        LOG_E(ci_log(FN_FMT "%d FULL %s %s:%u->%s:%u hops=%u",
                     FN_PRI_ARGS(netif),
                     OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol),
                     ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
                     ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
                     hops));
        s->s_flags |= CI_SOCK_FLAG_SW_FILTER_FULL;
      }

      CITP_STATS_NETIF_INC(netif, sw_filter_insert_table_full);
      return -ENOBUFS;
    }
  }

  /* Now insert the new entry. */
  LOG_TC(ci_log(FN_FMT "%d INSERT %s %s:%u->%s:%u hash=%u:%u at=%u "
		"over=%d hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id),
                CI_IP_PROTOCOL_STR(protocol),
		ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport),
		ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport),
		first, hash2, hash1, entry->id, hops));

#if CI_CFG_STATS_NETIF
  if( hops > netif->state->stats.table_max_hops )
    netif->state->stats.table_max_hops = hops;
  /* Keep a rolling average of the number of hops per entry. */
  if( netif->state->stats.table_mean_hops == 0 )
    netif->state->stats.table_mean_hops = 1;
  netif->state->stats.table_mean_hops =
    (netif->state->stats.table_mean_hops * 9 + hops) / 10;

  if( entry->id == EMPTY )
    ++netif->state->stats.table_n_slots;
  ++netif->state->stats.table_n_entries;
#endif

  entry->id = OO_SP_TO_INT(tcp_id);
  entry->laddr = laddr;
  return 0;
}
示例#20
0
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts,
				   ci_uint32 dst_be32, unsigned dport_be16,
                                   int* fail_rc)
{
  ci_ip_pkt_fmt* pkt;
  int rc = 0;

  ci_assert(ts->s.pkt.mtu);

  /* Now that we know the outgoing route, set the MTU related values.
   * Note, even these values are speculative since the real MTU
   * could change between now and passing the packet to the lower layers
   */
  ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr);
#if CI_CFG_LIMIT_AMSS
  ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__);
#endif

  /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */
  ts->smss = CI_CFG_TCP_DEFAULT_MSS;

  /* set pmtu, eff_mss, snd_buf and adjust windows */
  ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu);
  ci_tcp_set_eff_mss(ni, ts);
  ci_tcp_set_initialcwnd(ni, ts);

  /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay
   * zero until the connection is established.
   */
  ts->so_sndbuf_pkts = 0;

  /* 
   * 3. State and address are OK. It's address routed through our NIC.
   *    Do connect().
   */
  ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY);

  if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) {
    ci_sock_cmn* s = &ts->s;
    ci_uint16 source_be16 = 0;

    if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND )
      rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16);
    else 
      rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16);
    if(CI_LIKELY( rc == 0 )) {
      TS_TCP(ts)->tcp_source_be16 = source_be16;
      ts->s.cp.lport_be16 = source_be16;
      LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", 
                 LNT_PRI_ARGS(ni, ts),
                 ip_addr_str(INADDR_ANY),
                 (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16)));
    }
    else {
      LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc),
                   __FILE__, __LINE__));
      *fail_rc = rc;
      return CI_CONNECT_UL_FAIL;
    }
    if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) {
      CI_SET_ERROR(*fail_rc, EINVAL);
      return CI_CONNECT_UL_FAIL;
    }
  }

  ci_tcp_set_peer(ts, dst_be32, dport_be16);

  /* Make sure we can get a buffer before we change state. */
  pkt = ci_netif_pkt_tx_tcp_alloc(ni);
  if( CI_UNLIKELY(! pkt) ) {
    /* NB. We've already done a poll above. */
    rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ);
    if( ci_netif_pkt_wait_was_interrupted(rc) ) {
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_LOCK_DROPPED;
    }
    /* OK, there are (probably) packets available - go try again.  Note we
     * jump back to the top of the function because someone may have
     * connected this socket in the mean-time, so we need to check the
     * state once more.
     */
    return CI_CONNECT_UL_START_AGAIN;
  }

#ifdef ONLOAD_OFE
    if( ni->ofe != NULL )
      ts->s.ofe_code_start = ofe_socktbl_find(
                        ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE,
                        tcp_laddr_be32(ts), tcp_raddr_be32(ts),
                        tcp_lport_be16(ts), tcp_rport_be16(ts));
#endif

  rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice,
                             OO_SP_NULL);
  if( rc < 0 ) {
    /* Perhaps we've run out of filters?  See if we can push a socket out
     * of timewait and steal its filter.
     */
    ci_assert_nequal(rc, -EFILTERSSOME);
    if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) ||
        (rc = ci_tcp_ep_set_filters(ni, S_SP(ts),
                                    ts->s.cp.so_bindtodevice,
                                    OO_SP_NULL)) < 0 ) {
      ci_assert_nequal(rc, -EFILTERSSOME);
      /* Either a different error, or our efforts to free a filter did not
       * work.
       */
      if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) {
        ts->s.pkt.ip.ip_saddr_be32 = 0;
        ts->s.cp.ip_laddr_be32 = 0;
      }
      ci_netif_pkt_release(ni, pkt);
      CI_SET_ERROR(*fail_rc, -rc);
      return CI_CONNECT_UL_FAIL;
    }
  }

  LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts),
	     ip_addr_str(ts->s.pkt.ip.ip_saddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16),
	     ip_addr_str(ts->s.pkt.ip.ip_daddr_be32),
	     (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16)));

  /* We are going to send the SYN - set states appropriately */
  tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) =
    ci_tcp_initial_seqno(ni);
  ts->snd_max = tcp_snd_nxt(ts) + 1;

  /* Must be after initialising snd_una. */
  ci_tcp_clear_rtt_timing(ts);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN);
  ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK;
  ts->tcpflags |= NI_OPTS(ni).syn_opts;

  if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) {
    ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s));
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl);
  }
  else {
    ts->rcv_wscl = 0;
    CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0);
  }
  ci_tcp_set_rcvbuf(ni, ts);
  ci_tcp_init_rcv_wnd(ts, "CONNECT");

  /* outgoing_hdrs_len is initialised to include timestamp option. */
  if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) )
    ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr);
  if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32,
			ts->s.pkt.ip.ip_daddr_be32) )
    ts->tcpflags |= CI_TCPT_FLAG_STRIPE;
  ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT);

  /* If the app trys to send data on a socket in SYN_SENT state
  ** then the data is queued for send until the SYN gets ACKed.
  ** (rfc793 p56)
  **
  ** Receive calls on the socket should block until data arrives
  ** (rfc793 p58)
  **
  ** Clearing tx_errno and rx_errno acheive this. The transmit window
  ** is set to 1 byte which ensures that only the SYN packet gets
  ** sent until the ACK is received with more window. 
  */
  ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1);
  ts->s.rx_errno = 0;
  ts->s.tx_errno = 0; 
  ci_tcp_enqueue_no_data(ts, ni, pkt);
  ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK);  

  if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) {
    ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT;
    LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS",
		LNT_PRI_ARGS(ni, ts)));
    CI_SET_ERROR(*fail_rc, EINPROGRESS);
    return CI_CONNECT_UL_FAIL;
  }

  return CI_CONNECT_UL_OK;
}