void ci_netif_filter_remove(ci_netif* netif, oo_sp sock_p, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry* entry; unsigned hash1, hash2, tbl_i; ci_netif_filter_table* tbl; int hops = 0; unsigned first; ci_assert(ci_netif_is_locked(netif) #ifdef __KERNEL__ /* release_ep_tbl might be called without the stack lock. * Do not complain about this. */ || (netif2tcp_helper_resource(netif)->k_ref_count & TCP_HELPER_K_RC_DEAD) #endif ); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_TC(ci_log("%s: [%d:%d] REMOVE %s %s:%u->%s:%u hash=%u:%u", __FUNCTION__, NI_ID(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), hash1, hash2)); tbl_i = hash1; while( 1 ) { entry = &tbl->table[tbl_i]; if( entry->id == OO_SP_TO_INT(sock_p) ) { if( laddr == entry->laddr ) break; } else if( entry->id == EMPTY ) { /* We allow multiple removes of the same filter -- helps avoid some * complexity in the filter module. */ return; } tbl_i = (tbl_i + hash2) & tbl->table_size_mask; ++hops; if( tbl_i == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP [%d] %s %s:%u->%s:%u", FN_PRI_ARGS(netif), OO_SP_FMT(sock_p), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport))); return; } } __ci_netif_filter_remove(netif, hash1, hash2, hops, tbl_i); }
/** * callback called by nipns_client * - this seems used for the iaddr in the stun_t which will soon be obsolete * by the abstraction of tunnel and connection */ static void onet_tunnel_resp_iaddr_req_cb( void *userptr, ns_result_t result ) { onet_tunnel_t *tunnel = userptr; onet_resp_iaddr_req_t *iaddr_req = tunnel->resp_iaddr_req; ns_db_record_t *record; gen_hd_t *gen_hd; // check if the record has been found. if not, close the tunnel if( result != NS_RESULT_FOUND ){ LOGM_ERR("cant get the inner address of the remote nipid for this connection. ns_result=%s\n", ns_result_str(result)); onet_tunnel_close( tunnel ); return; } // get the found record record = ns_util_db_get( NULL, &iaddr_req->remote_nipid, NS_REC_DNS_HOSTNAME ); DBG_ASSERT( record ); // copy the remote_iaddr gen_hd = record->data; ip_addr_from_payload( &tunnel->remote_iaddr, payl_gen_hd_data(gen_hd) ); DBG("set the tunnel remote_iaddr to %s\n", ip_addr_str(&tunnel->remote_iaddr) ); // close the resp_iaddr_req onet_tunnel_resp_iaddr_req_close( tunnel ); // logging LOGM_WARNING("tunnel established with %s (%s) as responder\n" , stun_get_remote_identity(tunnel->stun) , ip_addr_str(&tunnel->remote_iaddr) ); }
ci_sock_cmn* __ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { int rc; /* try full lookup */ rc = ci_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol); LOG_NV(log(LPF "FULL LOOKUP %s:%u->%s:%u rc=%d", ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), rc)); if(CI_LIKELY( rc >= 0 )) return ID_TO_SOCK(netif, netif->filter_table->table[rc].id); /* try wildcard lookup */ raddr = rport = 0; rc = ci_netif_filter_lookup(netif, laddr, lport, raddr, rport, protocol); LOG_NV(log(LPF "WILD LOOKUP %s:%u->%s:%u rc=%d", ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), rc)); if(CI_LIKELY( rc >= 0 )) return ID_TO_SOCK(netif, netif->filter_table->table[rc].id); return 0; }
static ci_tcp_state_synrecv* ci_tcp_listenq_bucket_lookup(ci_netif* ni, ci_tcp_listen_bucket* bucket, ciip_tcp_rx_pkt* rxp, int level) { ci_ni_aux_mem* aux; int idx = ci_tcp_listenq_hash2idx(rxp->hash, level); ci_tcp_state_synrecv* tsr; unsigned saddr, daddr, sport; #ifdef __KERNEL__ int i = 0; if( level > CI_LISTENQ_BUCKET_MAX_DEPTH(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return 0; } #endif LOG_TV(ci_log("%s([%d] level=%d hash:%x l:%s r:%s:%d)", __func__, NI_ID(ni), level, rxp->hash, ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32), ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32), CI_BSWAP_BE16(rxp->tcp->tcp_source_be16))); if( OO_P_IS_NULL(bucket->bucket[idx]) ) return NULL; level++; aux = ci_ni_aux_p2aux(ni, bucket->bucket[idx]); if( aux->type == CI_TCP_AUX_TYPE_BUCKET ) return ci_tcp_listenq_bucket_lookup(ni, &aux->u.bucket, rxp, level); saddr = oo_ip_hdr(rxp->pkt)->ip_saddr_be32; daddr = oo_ip_hdr(rxp->pkt)->ip_daddr_be32; sport = rxp->tcp->tcp_source_be16; tsr = &aux->u.synrecv; do { if( ! ((saddr - tsr->r_addr) | (daddr - tsr->l_addr) | (sport - tsr->r_port)) ) return tsr; if( OO_P_IS_NULL(tsr->bucket_link) ) return NULL; aux = ci_ni_aux_p2aux(ni, tsr->bucket_link); tsr = &aux->u.synrecv; #ifdef __KERNEL__ if( i++ > CI_LISTENQ_BUCKET_LIST_LIMIT(ni) ) { ci_netif_error_detected(ni, CI_NETIF_ERROR_SYNRECV_TABLE, __FUNCTION__); return NULL; } #endif } while(1); /* unreachable */ return NULL; }
void ci_netif_filter_for_each_match(ci_netif* ni, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol, int intf_i, int vlan, int (*callback)(ci_sock_cmn*, void*), void* callback_arg, ci_uint32* hash_out) { ci_netif_filter_table* tbl; unsigned hash1, hash2 = 0; unsigned first; tbl = ni->filter_table; if( hash_out != NULL ) *hash_out = tcp_hash3(tbl, laddr, lport, raddr, rport, protocol); hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_NV(log("%s: %s %s:%u->%s:%u hash=%u:%u at=%u", __FUNCTION__, CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol), hash1)); while( 1 ) { int id = tbl->table[hash1].id; if(CI_LIKELY( id >= 0 )) { ci_sock_cmn* s = ID_TO_SOCK(ni, id); if( ((laddr - tbl->table[hash1].laddr) | (lport - sock_lport_be16(s) ) | (raddr - sock_raddr_be32(s) ) | (rport - sock_rport_be16(s) ) | (protocol - sock_protocol(s) )) == 0 ) if(CI_LIKELY( (s->rx_bind2dev_ifindex == CI_IFID_BAD || ci_sock_intf_check(ni, s, intf_i, vlan)) )) if( callback(s, callback_arg) != 0 ) return; } else if( id == EMPTY ) break; /* We defer calculating hash2 until it's needed, just to make the fast ** case that little bit faster. */ if( hash1 == first ) hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { LOG_NV(ci_log(FN_FMT "ITERATE FULL %s:%u->%s:%u hash=%u:%u", FN_PRI_ARGS(ni), ip_addr_str(laddr), lport, ip_addr_str(raddr), rport, hash1, hash2)); break; } } }
static char * ci_udp_addr_str( ci_udp_state* us ) { static char buf[128]; ci_assert(us); sprintf( buf, "L[%s:%d] R[%s:%d]", ip_addr_str( udp_laddr_be32(us)), CI_BSWAP_BE16(udp_lport_be16(us)), ip_addr_str( udp_raddr_be32(us)), CI_BSWAP_BE16(udp_rport_be16(us)) ); return buf; }
static int onet_httpd_handler_cb(char *path, httpd_var_t *httpd_var, GByteArray *buf ) { onet_t *onet = onet_main; GList *elem; httpd_printf_page_title(buf, "NeoIp Router" ); httpd_printf(buf,"<b>Local ip address:</b> %s<br>", ip_addr_str(&onet->ip_iaddr) ); httpd_printf(buf,"<b>Local ip netmask:</b> %s<br>", ip_netmask_str(&onet->ip_netmask) ); httpd_printf(buf,"<b>Local cnxid:</b> %s<br>", nipid_str(&onet->local_cnxid) ); httpd_printf(buf,"<b>virtual network device:</b> %s<br>", onet->vdev.dev_name ); httpd_printf(buf,"<hr><h3><div align=\"center\">Tunnel List:</div></h3>" ); httpd_printf_table_start(buf); httpd_printf_tr_start(buf); httpd_printf_th(buf, "remote iaddr"); httpd_printf_th(buf, "state"); httpd_printf_th(buf, "remote identity"); httpd_printf_th(buf, "info on connection"); httpd_printf_th_title(buf, "number of packets per sec estimated of the last 5-sec", "packet rate"); httpd_printf_th_title(buf, "number of kbyte per sec estimated of the last 5-sec", "throughput"); httpd_printf_tr_end(buf); for( elem = onet->tunnel_list; elem; elem = g_list_next( elem ) ){ onet_tunnel_t *tunnel = elem->data; httpd_printf_tr_start(buf); if( tunnel->itor ){ httpd_printf_td(buf, "%s", ip_addr_str(&tunnel->remote_iaddr)); httpd_printf_td(buf, "Initiating"); httpd_printf_td(buf, "none yet"); httpd_printf_td(buf, "<a href=\"%s\" title=\"Provide details on this connection's initiator\">X</a>" , itor_httpd_get_link(tunnel->itor, "disp_single")); }else if( tunnel->resp_iaddr_req ){ httpd_printf_td(buf, "none yet"); httpd_printf_td(buf, "Responding"); httpd_printf_td(buf, "none yet"); httpd_printf_td(buf, "none yet"); }else{ httpd_printf_td(buf, "%s", ip_addr_str(&tunnel->remote_iaddr)); httpd_printf_td(buf, "Established"); DBG_ASSERT( tunnel->stun ); httpd_printf_td(buf, "%s", stun_get_remote_identity(tunnel->stun) ); httpd_printf_td(buf, "<a href=\"%s\" title=\"Provide details on this established connection\">X</a>" , stun_httpd_get_link(tunnel->stun, "disp_single")); } httpd_printf_td(buf, "%.2lf pkt/sec", rate_estim_get_avg_delay(tunnel->pkt_rate, 5*1000, 1000) ); httpd_printf_td(buf, "%.2lf kbyte/sec", rate_estim_get_avg_delay(tunnel->throughput, 5*1000, 1000)/1024 ); httpd_printf_tr_end(buf); } httpd_printf_table_end(buf); return 0; }
int ci_netif_filter_lookup(ci_netif* netif, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { unsigned hash1, hash2 = 0; ci_netif_filter_table* tbl; unsigned first; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); ci_assert(netif->filter_table); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); first = hash1; LOG_NV(log("tbl_lookup: %s %s:%u->%s:%u hash=%u:%u at=%u", CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, tcp_hash2(tbl, laddr, lport, raddr, rport, protocol), hash1)); while( 1 ) { int id = tbl->table[hash1].id; if( CI_LIKELY(id >= 0) ) { ci_sock_cmn* s = ID_TO_SOCK(netif, id); if( ((laddr - tbl->table[hash1].laddr) | (lport - sock_lport_be16(s) ) | (raddr - sock_raddr_be32(s) ) | (rport - sock_rport_be16(s) ) | (protocol - sock_protocol(s) )) == 0 ) return hash1; } if( id == EMPTY ) break; /* We defer calculating hash2 until it's needed, just to make the fast * case that little bit faster. */ if( hash1 == first ) hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { LOG_E(ci_log(FN_FMT "ERROR: LOOP %s:%u->%s:%u hash=%u:%u", FN_PRI_ARGS(netif), ip_addr_str(laddr), lport, ip_addr_str(raddr), rport, hash1, hash2)); return -ELOOP; } } return -ENOENT; }
/** * callback called by itor when the connection succeed or fail */ static void onet_itor_cnx_result_cb( void *userptr, itor_cnx_result_t result, stun_t *stun ) { onet_t *onet = onet_main; onet_tunnel_t *tunnel = userptr; DBG("enter result=%d stun=%p\n", result, stun); // if the result isnt connected, close this tunnel if( result != ITOR_RESULT_CONNECTED ){ // put the destination ip address in the negcache dst_iaddr_negcache_add( onet->dst_iaddr_negcache, &tunnel->remote_iaddr, 0 ); // loging for diagnostict LOGM_ERR("tunnel initialization failed because %s\n", itor_cnx_result_str(result) ); // close the tunnel onet_tunnel_close( tunnel ); return; } // sanity check DBG_ASSERT( tunnel->itor ); DBG_ASSERT( stun ); // close the itor itor_stop( tunnel->itor ); tunnel->itor = NULL; // update the struct tunnel->stun = stun; // set the stun callback stun_set_inerr_cb( tunnel->stun, onet_stun_inerr_cb, tunnel ); // complete the init with the common part onet_tunnel_init_common( tunnel ); // flush triggering packet onet_tunnel_itor_trigger_pkt_flush( tunnel ); // logging LOGM_WARNING("tunnel established with %s (%s) as itor\n" , stun_get_remote_identity(tunnel->stun) , ip_addr_str(&tunnel->remote_iaddr) ); }
/** * return non-null if the address is a local one */ int onet_is_local_addr( ip_addr_t *ip_addr ) { // TODO if ip_netmask is null, return 0; DBG("is %s belong to %s\n", ip_addr_str( ip_addr ), ip_netmask_str( &onet_main->ip_netmask) ); if( ip_addr_belong_to_netmask( ip_addr, &onet_main->ip_netmask ) ) return 1; return 0; }
/* ** See if there is a synrecv object that matches this syn request already. */ ci_tcp_state_synrecv* ci_tcp_listenq_lookup(ci_netif* netif, ci_tcp_socket_listen* tls, ciip_tcp_rx_pkt* rxp) { ci_tcp_state_synrecv* tsr; tsr = ci_tcp_listenq_bucket_lookup( netif, ci_ni_aux_p2bucket(netif, tls->bucket), rxp, 0); if( tsr == NULL ) { LOG_TV(log(LPF "no match for %s:%d->%s:%d", ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_saddr_be32), (int) CI_BSWAP_BE16(rxp->tcp->tcp_source_be16), ip_addr_str(oo_ip_hdr(rxp->pkt)->ip_daddr_be32), (int) CI_BSWAP_BE16(rxp->tcp->tcp_dest_be16))); } return tsr; }
/** * set a netmask route for this vdev */ static int onet_add_route_netmask( ip_netmask_t *netmask ) { ip_addr_t ip_addr; char cmd[300]; onet_main->ip_netmask = *netmask; // honor the tun_stub user option if( prop_get_bool_dfl( "neoip_router", "debug:tun_stub", 0 ) ) return 0; // TODO this function is really lame // - the system() // - what if the ip addr already exists ip_netmask_any_addr(netmask, &ip_addr); snprintf(cmd,sizeof(cmd), "ip route add %s/%d dev %s" , ip_addr_str(&ip_addr) , ip_netmask_get_prefix_len(netmask) , onet_main->vdev.dev_name ); DBG("exec a cmd <%s> LAME change it to syscall\n", cmd ); system( cmd ); return 0; }
/** * return 0 if the local database contain a NS_REC_DNS_IP_ADDR for this dst_iaddr, non null otherwise * - if there is no record, launch a query for it */ static int onet_ns_req_dst_iaddr_test( ip_addr_t *dst_iaddr ) { onet_t *onet = onet_main; nipid_t recordid; onet_ns_req_dst_iaddr_t *ns_req_dst_iaddr; nipid_build_dns_ip_addr( &recordid, dst_iaddr ); // if the record is already in the database, do nothing if( ns_util_db_get( NULL, &recordid, NS_REC_DNS_IP_ADDR ) ) return 0; // if there is a pending ns_query for it, do nothing but return faillure if( onet_ns_req_dst_iaddr_get( dst_iaddr ) ) return 1; // if there is no query, launch one ns_req_dst_iaddr = nipmem_zalloc(sizeof(*ns_req_dst_iaddr)); // fill the struct ns_req_dst_iaddr->dst_iaddr = *dst_iaddr; ns_req_dst_iaddr->ns_req = ns_query_req_open( NULL, &recordid, NS_REC_DNS_IP_ADDR, ONET_DELAY_B4_ICMP , onet_ns_req_dns_ip_addr_ns_query_cb, ns_req_dst_iaddr ); // update to the list onet->ns_req_dst_iaddr_list = g_list_append(onet->ns_req_dst_iaddr_list, ns_req_dst_iaddr ); DBG("launch a ns_req_dst_iaddr for %s\n", ip_addr_str(dst_iaddr) ); // return faillure return 1; }
/** * called when the iaddr allocation provides a result */ static void onet_iaddr_log_client_cb( void *userptr, iaddr_log_client_state_t result, ip_addr_t *ip_addr, ip_netmask_t *ip_netmask ) { onet_t *onet = onet_main; DBG("enter result=%d\n", result); if( result != IADDR_LOG_CLIENT_STATE_SUCCEED ){ // TODO what do i do here ?!?!?!?! i close the tunnel ? // - i retry in 10sec ? YES!!! anyway you cant do anything without it return; } // sanity check DBG_ASSERT( result == IADDR_LOG_CLIENT_STATE_SUCCEED ); // some logging DBG("SUCCEED ip_addr=%s ip_netmask=%s\n", ip_addr_str(ip_addr), ip_netmask_str(ip_netmask) ); // update the struct onet->ip_iaddr = *ip_addr; onet->ip_netmask = *ip_netmask; // close the iaddr_log_client iaddr_log_client_close( &onet->iaddr_log_client ); // complete the init of the onet onet_init_post_iaddr(); }
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) { /* ** ?? error handling on possible fails not handled robustly... ** ?? Need to check port number is valid TODO */ /*! \todo If not bound then we have to be listening on all interfaces. * It's likely that we won't be coming through here as we have to * listen on the OS socket too! */ ci_tcp_state* ts; ci_tcp_socket_listen* tls; ci_netif* netif = ep->netif; ci_sock_cmn* s = ep->s; unsigned ul_backlog = backlog; int rc; oo_p sp; LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), backlog)); CHECK_TEP(ep); if( NI_OPTS(netif).tcp_listen_handover ) return CI_SOCKET_HANDOVER; if( !NI_OPTS(netif).tcp_server_loopback) { /* We should handover if the socket is bound to alien address. */ if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) return CI_SOCKET_HANDOVER; } if( ul_backlog < 0 ) ul_backlog = NI_OPTS(netif).max_ep_bufs; else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog ) ul_backlog = NI_OPTS(netif).acceptq_min_backlog; if( s->b.state == CI_TCP_LISTEN ) { tls = SOCK_TO_TCP_LISTEN(s); tls->acceptq_max = ul_backlog; ci_tcp_helper_listen_os_sock(fd, ul_backlog); return 0; } if( s->b.state != CI_TCP_CLOSED ) { CI_SET_ERROR(rc, EINVAL); return rc; } ts = SOCK_TO_TCP(s); /* Bug 3376: if socket used for a previous, failed, connect then the error * numbers will not be as expected. Only seen when not using listening * netifs (as moving the EP to the new netif resets them). */ ts->s.tx_errno = EPIPE; ts->s.rx_errno = ENOTCONN; /* fill in address/ports and all TCP state */ if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) { ci_uint16 source_be16; /* They haven't previously done a bind, so we need to choose * a port. As we haven't been given a hint we let the OS choose. */ source_be16 = 0; rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); if (CI_LIKELY( rc==0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", LNT_PRI_ARGS(ep->netif, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); return rc; } } ci_sock_lock(netif, &ts->s.b); ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN); tls = SOCK_TO_TCP_LISTEN(&ts->s); tcp_raddr_be32(tls) = 0u; tcp_rport_be16(tls) = 0u; ci_assert_equal(tls->s.tx_errno, EPIPE); ci_assert_equal(tls->s.rx_errno, ENOTCONN); /* setup listen timer - do it before the first return statement, * because __ci_tcp_listen_to_normal() will be called on error path. */ if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { sp = TS_OFF(netif, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid)); ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq"); tls->listenq_tid.param1 = S_SP(tls); tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN; } rc = ci_tcp_listen_init(netif, tls); ci_sock_unlock(netif, &ts->s.b); if( rc != 0 ) { CI_SET_ERROR(rc, -rc); goto listen_fail; } tls->acceptq_max = ul_backlog; CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats)); /* install all the filters needed for this connection * - tcp_laddr_be32(ts) = 0 for IPADDR_ANY * * TODO: handle BINDTODEVICE by setting phys_port paramter to correct * physical L5 port index * TODO: handle REUSEADDR by setting last paramter to TRUE */ if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { #ifdef ONLOAD_OFE if( netif->ofe != NULL ) { tls->s.ofe_code_start = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_LISTEN, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); tls->ofe_promote = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); } #endif rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; else { ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); rc = -ENOBUFS; } } ci_assert_nequal(rc, -EFILTERSSOME); VERB(ci_log("%s: set_filters returned %d", __FUNCTION__, rc)); if (rc < 0) { CI_SET_ERROR(rc, -rc); goto post_listen_fail; } } /* * Call of system listen() is required for listen any, local host * communications server and multi-homed server (to accept connections * to L5 assigned address(es), but incoming from other interfaces). */ #ifdef __ci_driver__ { rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif), S_SP(tls), backlog); } #else rc = ci_tcp_helper_listen_os_sock(fd, backlog); #endif if ( rc < 0 ) { /* clear the filter we've just set */ ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); goto post_listen_fail; } return 0; post_listen_fail: ci_tcp_listenq_drop_all(netif, tls); listen_fail: /* revert TCP state to a non-listening socket format */ __ci_tcp_listen_to_normal(netif, tls); /* Above function sets orphan flag but we are attached to an FD. */ ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); #ifdef __ci_driver__ return rc; #else return CI_SOCKET_ERROR; #endif }
/* Complete a UDP U/L connect. The sys connect() call must have been made * (and succeeded) before calling this function. So if anything goes wrong * in here, then it can be consider an internal error or failing of onload. */ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen, ci_fd_t os_sock) { const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr; ci_uint32 dst_be32; ci_udp_state* us = SOCK_TO_UDP(ep->s); int onloadable; int rc = 0; CHECK_UEP(ep); UDP_CLR_FLAG(us, CI_UDPF_EF_SEND); us->s.rx_errno = 0; us->s.tx_errno = 0; if( IS_DISCONNECTING(serv_sin) ) { rc = ci_udp_disconnect(ep, us, os_sock); goto out; } #if CI_CFG_FAKE_IPV6 if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) { LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us))); goto handover; } #endif dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr); if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), errno)); goto out; } us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED; ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port); cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp); switch( us->s.pkt.status ) { case retrrc_success: case retrrc_nomac: onloadable = 1; break; default: onloadable = 0; if( NI_OPTS(ep->netif).udp_connect_handover ) { LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } break; } if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) { LOG_UC(log(FNT_FMT "%s:%d - route via OS socket", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); ci_udp_clr_filters(ep); return 0; } if( CI_IP_IS_LOOPBACK(dst_be32) ) { /* After connecting via loopback it is not possible to connect anywhere * else. */ LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } if( onloadable ) { #ifdef ONLOAD_OFE if( ep->netif->ofe != NULL ) us->s.ofe_code_start = ofe_socktbl_find( ep->netif->ofe, OFE_SOCKTYPE_UDP, udp_laddr_be32(us), udp_raddr_be32(us), udp_lport_be16(us), udp_rport_be16(us)); #endif if( (rc = ci_udp_set_filters(ep, us)) != 0 ) { /* Failed to set filters. Most likely we've run out of h/w filters. * Handover to O/S to avoid breaking the app. * * TODO: Actually we probably won't break the app if we don't * handover, as packets will still get delivered via the kernel * stack. Might be worth having a runtime option to choose whether * or not to handover in such cases. */ LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), rc)); CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter); goto out; } } else { ci_udp_clr_filters(ep); } LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)", SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS", ip_addr_str(udp_laddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), ip_addr_str(udp_raddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); return 0; out: if( rc < 0 && CITP_OPTS.no_fail ) goto handover; return rc; handover: ci_udp_clr_filters(ep); return CI_SOCKET_HANDOVER; }
/* In this bind handler we just check that the address to which * are binding is either "any" or one of ours. * In the Linux kernel version [fd] is unused. */ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, socklen_t addrlen, ci_fd_t fd ) { struct sockaddr_in* my_addr_in; ci_uint16 new_port; ci_uint32 addr_be32; ci_sock_cmn* s = ep->s; ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp; int rc; CHECK_TEP(ep); my_addr_in = (struct sockaddr_in*) my_addr; /* Check if state of the socket is OK for bind operation. */ /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used. * What is better? */ if (my_addr == NULL) RET_WITH_ERRNO( EINVAL ); if (s->b.state != CI_TCP_CLOSED) RET_WITH_ERRNO( EINVAL ); if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB) RET_WITH_ERRNO( EINVAL ); if( my_addr->sa_family != s->domain ) RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL ); /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) * Linux is also relaxed about overlength data areas. */ if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in)) RET_WITH_ERRNO( EINVAL ); #if CI_CFG_FAKE_IPV6 if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133) RET_WITH_ERRNO( EINVAL ); if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) ) return CI_SOCKET_HANDOVER; #endif addr_be32 = ci_get_ip4_addr(s->domain, my_addr); /* Using the port number provided, see if we can do this bind */ new_port = my_addr_in->sin_port; if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) { struct ci_port_list *force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) { if( force_reuseport->port == new_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); if( rc != 0 && errno == ENOPROTOOPT ) ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY; ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); } } } if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) CI_LOGLEVEL_TRY_RET(LOG_TV, __ci_bind(ep->netif, ep->s, addr_be32, &new_port)); ep->s->s_flags |= CI_SOCK_FLAG_BOUND; sock_lport_be16(s) = new_port; sock_laddr_be32(s) = addr_be32; if( CI_IP_IS_MULTICAST(addr_be32) ) s->cp.ip_laddr_be32 = 0; else s->cp.ip_laddr_be32 = addr_be32; s->cp.lport_be16 = new_port; sock_rport_be16(s) = sock_raddr_be32(s) = 0; LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32), (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port), CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); return 0; }
/** * callback to received data from the vdev * - if a established tunnel exist, send the packet to it * - if a tunnel is currently in establishment, set the triggering packet * - if no tunnel exist, start the establishement on one */ static void onet_vdev_in( void *userptr, int ethertype, char *pkt, int pkt_len ) { struct iphdr *iph = (struct iphdr *)pkt; onet_t *onet = onet_main; onet_tunnel_t *tunnel; ip_addr_t dst_iaddr; DBG("enter ethertype=0x%x\n", ethertype ); // handle only ipv4 for now EXP_ASSERT( ethertype == ETHERTYPE_IP ); // sanity check // TODO put the basic check of the ipv4 packet in a function if( pkt_len < sizeof(*iph) ){ LOG(0,"received bogus packet of %d-byte. not even big enought for an ipv4 header\n", pkt_len ); return; } if( pkt_len < iph->ihl*4 ){ LOG(0,"received bogus packet of %d-byte with ipv4_hd->ihl=%d\n", pkt_len, iph->ihl*4 ); return; } // get the destination ip address from the packet ip_addr_v4_set( &dst_iaddr, ntohl(iph->daddr) ); // find a existing tunnel if there is any tunnel = onet_tunnel_from_remote_iaddr( &dst_iaddr ); // if there is a tunnel and the connection is already established, send it thru it if( tunnel && tunnel->stun ){ DBG("there is already a establish link for this packet \n"); // TMP: just to test the limitor if( rate_limit_exceeded( tunnel->thput_limit) ){ // LOGM_ERR("packet discarded due to rate limiter\n"); return; } // update the pkt_rate and throughput rate_estim_add( tunnel->pkt_rate, 1 ); rate_estim_add( tunnel->throughput, pkt_len ); // send the packet stun_out_data( tunnel->stun, ethertype, pkt, pkt_len ); return; } // if the ipaddr is in the dst_iaddr_negcache, return if( dst_iaddr_negcache_is_present( onet->dst_iaddr_negcache, &dst_iaddr) ){ // return an ICMP if the ip record is in the dst_iaddr_negcache // - apply the concept of not replying a icmp immediatly to let // the time to resolve the address // - similar to the time to solve the hw address with ARP // - as in rfc2461.7.2.2, ICMP must be replied after 3sec // - it is ONET_DELAY_B4_ICMP // TODO the timer aspect isnt well respected now // - itor has its own timer see bug 359 // - onet_ns_req_dst_iaddr_* honor it tho raw_icmp_reply_send( ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, pkt, pkt_len ); return; } // if there no record for this dst_iaddr in the local database, discard the packet if( onet_ns_req_dst_iaddr_test( &dst_iaddr ) ){ DBG("received packet for which im unable to find a rdvpoint\n" ); return; } // if a establishing tunnel exists, update the trigerring packet if( tunnel ){ DBG_ASSERT( tunnel->itor ); // update the trigger packet onet_tunnel_itor_trigger_pkt_set( tunnel, ethertype, pkt, pkt_len ); DBG("tunnel is currently in establishement for this packet\n"); return; } DBG_ASSERT( !tunnel ); // create a tunnel as itor tunnel = onet_tunnel_open_itor( &dst_iaddr ); if( !tunnel ){ LOGM_ERR("can't initiate a tunnel toward the iaddr %s\n", ip_addr_str( &dst_iaddr ) ); return; } // set the trigger packet onet_tunnel_itor_trigger_pkt_set( tunnel, ethertype, pkt, pkt_len ); }
/* Insert for either TCP or UDP */ int ci_netif_filter_insert(ci_netif* netif, oo_sp tcp_id, unsigned laddr, unsigned lport, unsigned raddr, unsigned rport, unsigned protocol) { ci_netif_filter_table_entry* entry; unsigned hash1, hash2; ci_netif_filter_table* tbl; #if !defined(NDEBUG) || CI_CFG_STATS_NETIF unsigned hops = 1; #endif unsigned first; ci_assert(netif); ci_assert(ci_netif_is_locked(netif)); ci_assert(netif->filter_table); tbl = netif->filter_table; hash1 = tcp_hash1(tbl, laddr, lport, raddr, rport, protocol); hash2 = tcp_hash2(tbl, laddr, lport, raddr, rport, protocol); first = hash1; /* Find a free slot. */ while( 1 ) { entry = &tbl->table[hash1]; if( entry->id < 0 ) break; ++entry->route_count; #if !defined(NDEBUG) || CI_CFG_STATS_NETIF ++hops; #endif /* A socket can only have multiple entries in the filter table if each * entry has a different [laddr]. */ ci_assert( !((entry->id == OO_SP_TO_INT(tcp_id)) && (laddr == entry->laddr)) ); hash1 = (hash1 + hash2) & tbl->table_size_mask; if( hash1 == first ) { ci_sock_cmn *s = SP_TO_SOCK_CMN(netif, tcp_id); if( ! (s->s_flags & CI_SOCK_FLAG_SW_FILTER_FULL) ) { LOG_E(ci_log(FN_FMT "%d FULL %s %s:%u->%s:%u hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), hops)); s->s_flags |= CI_SOCK_FLAG_SW_FILTER_FULL; } CITP_STATS_NETIF_INC(netif, sw_filter_insert_table_full); return -ENOBUFS; } } /* Now insert the new entry. */ LOG_TC(ci_log(FN_FMT "%d INSERT %s %s:%u->%s:%u hash=%u:%u at=%u " "over=%d hops=%u", FN_PRI_ARGS(netif), OO_SP_FMT(tcp_id), CI_IP_PROTOCOL_STR(protocol), ip_addr_str(laddr), (unsigned) CI_BSWAP_BE16(lport), ip_addr_str(raddr), (unsigned) CI_BSWAP_BE16(rport), first, hash2, hash1, entry->id, hops)); #if CI_CFG_STATS_NETIF if( hops > netif->state->stats.table_max_hops ) netif->state->stats.table_max_hops = hops; /* Keep a rolling average of the number of hops per entry. */ if( netif->state->stats.table_mean_hops == 0 ) netif->state->stats.table_mean_hops = 1; netif->state->stats.table_mean_hops = (netif->state->stats.table_mean_hops * 9 + hops) / 10; if( entry->id == EMPTY ) ++netif->state->stats.table_n_slots; ++netif->state->stats.table_n_entries; #endif entry->id = OO_SP_TO_INT(tcp_id); entry->laddr = laddr; return 0; }
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_uint32 dst_be32, unsigned dport_be16, int* fail_rc) { ci_ip_pkt_fmt* pkt; int rc = 0; ci_assert(ts->s.pkt.mtu); /* Now that we know the outgoing route, set the MTU related values. * Note, even these values are speculative since the real MTU * could change between now and passing the packet to the lower layers */ ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr); #if CI_CFG_LIMIT_AMSS ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__); #endif /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */ ts->smss = CI_CFG_TCP_DEFAULT_MSS; /* set pmtu, eff_mss, snd_buf and adjust windows */ ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu); ci_tcp_set_eff_mss(ni, ts); ci_tcp_set_initialcwnd(ni, ts); /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay * zero until the connection is established. */ ts->so_sndbuf_pkts = 0; /* * 3. State and address are OK. It's address routed through our NIC. * Do connect(). */ ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY); if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { ci_sock_cmn* s = &ts->s; ci_uint16 source_be16 = 0; if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND ) rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); else rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16); if(CI_LIKELY( rc == 0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(INADDR_ANY), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); *fail_rc = rc; return CI_CONNECT_UL_FAIL; } if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) { CI_SET_ERROR(*fail_rc, EINVAL); return CI_CONNECT_UL_FAIL; } } ci_tcp_set_peer(ts, dst_be32, dport_be16); /* Make sure we can get a buffer before we change state. */ pkt = ci_netif_pkt_tx_tcp_alloc(ni); if( CI_UNLIKELY(! pkt) ) { /* NB. We've already done a poll above. */ rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ); if( ci_netif_pkt_wait_was_interrupted(rc) ) { CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_LOCK_DROPPED; } /* OK, there are (probably) packets available - go try again. Note we * jump back to the top of the function because someone may have * connected this socket in the mean-time, so we need to check the * state once more. */ return CI_CONNECT_UL_START_AGAIN; } #ifdef ONLOAD_OFE if( ni->ofe != NULL ) ts->s.ofe_code_start = ofe_socktbl_find( ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE, tcp_laddr_be32(ts), tcp_raddr_be32(ts), tcp_lport_be16(ts), tcp_rport_be16(ts)); #endif rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL); if( rc < 0 ) { /* Perhaps we've run out of filters? See if we can push a socket out * of timewait and steal its filter. */ ci_assert_nequal(rc, -EFILTERSSOME); if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) || (rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL)) < 0 ) { ci_assert_nequal(rc, -EFILTERSSOME); /* Either a different error, or our efforts to free a filter did not * work. */ if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) { ts->s.pkt.ip.ip_saddr_be32 = 0; ts->s.cp.ip_laddr_be32 = 0; } ci_netif_pkt_release(ni, pkt); CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_FAIL; } } LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16), ip_addr_str(ts->s.pkt.ip.ip_daddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16))); /* We are going to send the SYN - set states appropriately */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = ci_tcp_initial_seqno(ni); ts->snd_max = tcp_snd_nxt(ts) + 1; /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN); ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK; ts->tcpflags |= NI_OPTS(ni).syn_opts; if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) { ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s)); CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl); } else { ts->rcv_wscl = 0; CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0); } ci_tcp_set_rcvbuf(ni, ts); ci_tcp_init_rcv_wnd(ts, "CONNECT"); /* outgoing_hdrs_len is initialised to include timestamp option. */ if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) ) ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr); if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32, ts->s.pkt.ip.ip_daddr_be32) ) ts->tcpflags |= CI_TCPT_FLAG_STRIPE; ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT); /* If the app trys to send data on a socket in SYN_SENT state ** then the data is queued for send until the SYN gets ACKed. ** (rfc793 p56) ** ** Receive calls on the socket should block until data arrives ** (rfc793 p58) ** ** Clearing tx_errno and rx_errno acheive this. The transmit window ** is set to 1 byte which ensures that only the SYN packet gets ** sent until the ACK is received with more window. */ ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1); ts->s.rx_errno = 0; ts->s.tx_errno = 0; ci_tcp_enqueue_no_data(ts, ni, pkt); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS", LNT_PRI_ARGS(ni, ts))); CI_SET_ERROR(*fail_rc, EINPROGRESS); return CI_CONNECT_UL_FAIL; } return CI_CONNECT_UL_OK; }