static int ci_udp_set_filters(citp_socket* ep, ci_udp_state* us) { int rc; ci_assert(ep); ci_assert(us); if( udp_lport_be16(us) == 0 ) return 0; rc = ci_tcp_ep_set_filters(ep->netif, S_SP(us), us->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; else { ci_tcp_ep_clear_filters(ep->netif, S_SP(us), 0); rc = -ENOBUFS; } } if( rc < 0 ) { LOG_UC(log(FNS_FMT "ci_tcp_ep_set_filters failed (%d)", FNS_PRI_ARGS(ep->netif, ep->s), -rc)); CI_SET_ERROR(rc, -rc); return rc; } UDP_SET_FLAG(us, CI_UDPF_FILTERED); return 0; }
static void ci_udp_clr_filters(citp_socket* ep) { ci_udp_state* us = SOCK_TO_UDP(ep->s); if( UDP_GET_FLAG(us, CI_UDPF_FILTERED) ) { ci_tcp_ep_clear_filters(ep->netif, S_SP(us), 0); UDP_CLR_FLAG(us, CI_UDPF_FILTERED); } }
ci_inline void ci_tcp_set_addr_on_promote(ci_netif* netif, ci_tcp_state* ts, ci_tcp_state_synrecv* tsr, ci_tcp_socket_listen* tls) { /* copy and initialise state */ ts->s.pkt.ip.ip_saddr_be32 = tsr->l_addr; TS_TCP(ts)->tcp_source_be16 = sock_lport_be16(&tls->s); ts->s.cp.ip_laddr_be32 = tsr->l_addr; ts->s.cp.lport_be16 = sock_lport_be16(&tls->s); ci_tcp_set_peer(ts, tsr->r_addr, tsr->r_port); /* "filter" equivalent for loopback socket */ if( OO_SP_NOT_NULL(tsr->local_peer) ) { ci_tcp_state *peer = ID_TO_TCP(netif, tsr->local_peer); ts->local_peer = tsr->local_peer; peer->local_peer = S_SP(ts); } }
ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) { ci_tcp_state* ts; ci_fd_t fd; ci_assert(ep); ci_assert(netif); ci_netif_lock(netif); ts = ci_tcp_get_state_buf(netif); if( ts == NULL ) { ci_netif_unlock(netif); LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif))); return -ENOMEM; } fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif), S_SP(ts), domain, type); if( fd < 0 ) { if( fd == -EAFNOSUPPORT ) LOG_U(ci_log("%s: ci_tcp_helper_sock_attach" \ "(domain=%d, type=%d) failed %d", __FUNCTION__, domain, type, fd)); else LOG_E(ci_log("%s: ci_tcp_helper_sock_attach" \ "(domain=%d, type=%d) failed %d", __FUNCTION__, domain, type, fd)); } else { ci_assert(~ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN); /* Apply default sockbuf sizes now we've updated them from the kernel ** defaults. */ ts->s.so.sndbuf = NI_OPTS(netif).tcp_sndbuf_def; ts->s.so.rcvbuf = NI_OPTS(netif).tcp_rcvbuf_def; ep->netif = netif; ep->s = &ts->s; CHECK_TEP(ep); } ci_netif_unlock(netif); return fd; }
void ci_udp_all_fds_gone(ci_netif* netif, oo_sp sock_id, int do_free) { /* All process references to this socket have gone. So we should * shutdown() if necessary, and arrange for all resources to eventually * get cleaned up. * * This is called by the driver only. [sock_id] is trusted. */ ci_udp_state* us = SP_TO_UDP(netif, sock_id); ci_assert(ci_netif_is_locked(netif)); ci_assert(us->s.b.state == CI_TCP_STATE_UDP); LOG_UC(ci_log("ci_udp_all_fds_gone: "NTS_FMT, NTS_PRI_ARGS(netif, us))); if( UDP_GET_FLAG(us, CI_UDPF_FILTERED) ) { UDP_CLR_FLAG(us, CI_UDPF_FILTERED); ci_tcp_ep_clear_filters(netif, S_SP(us), 0); } ci_udp_recv_q_drop(netif, &us->recv_q); ci_ni_dllist_remove(netif, &us->s.reap_link); if( OO_PP_NOT_NULL(us->zc_kernel_datagram) ) { ci_netif_pkt_release_rx(netif, PKT_CHK(netif, us->zc_kernel_datagram)); us->zc_kernel_datagram = OO_PP_NULL; us->zc_kernel_datagram_count = 0; } /* Only free state if no outstanding tx packets: otherwise it'll get * freed by the tx completion event. */ if( do_free ) { if( us->tx_count == 0 ) ci_udp_state_free(netif, us); else CITP_STATS_NETIF_INC(netif, udp_free_with_tx_active); } }
/* ** promote a synrecv structure to an established socket ** ** Assumes that the caller will handle a fail if we can't allocate a new ** tcp_state structure due to memory pressure or the like */ int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls, ci_tcp_state_synrecv* tsr, ci_ip_cached_hdrs* ipcache, ci_tcp_state** ts_out) { int rc = 0; ci_assert(netif); ci_assert(tls); ci_assert(tls->s.b.state == CI_TCP_LISTEN); ci_assert(tsr); if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) { ci_tcp_state* ts; /* grab a tcp_state structure that will go onto the accept queue. We take * from the cache of EPs if any are available */ ts = get_ts_from_cache (netif, tsr, tls); if( !ts ) { /* None on cache; try allocating a new ts */ ts = ci_tcp_get_state_buf(netif); #if CI_CFG_FD_CACHING if( ts == NULL ) { /* We've reaped. Did this result in any being cached */ ts = get_ts_from_cache(netif, tsr, tls); if (ts == NULL ) { /* No -- try again to allocate. */ ts = ci_tcp_get_state_buf(netif); } else { CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap); } } #endif if( ts == NULL ) { LOG_TV(ci_log("%s: [%d] out of socket buffers", __FUNCTION__, NI_ID(netif))); CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock); CI_SET_SO_ERROR(&tls->s, ENOMEM); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); return -ENOMEM; } ci_assert(ci_tcp_is_cached(ts) || (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); } #ifdef ONLOAD_OFE ts->s.ofe_code_start = tls->ofe_promote; #endif if( ! ci_tcp_is_cached(ts) ) { /* Need to initialise address information for use when setting filters */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); /* "borrow" filter from listening socket. For loopback socket, we * do not need filters, but we have to take a reference of the OS * socket. */ rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice, S_SP(tls)); if( rc < 0 ) { LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc)); /* Either put this back on the list (at the head) or free it */ ci_tcp_state_free(netif, ts); return rc; } } #if CI_CFG_FD_CACHING else { /* Now set the s/w filter. We leave the hw filter in place for cached * EPS. This will probably not have the correct raddr and rport, but as * it's sharing the listening socket's filter that's not a problem. It * will be updated if this is still around when the listener is closed. */ rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr, sock_lport_be16(&tls->s), tsr->r_addr, tsr->r_port, tcp_protocol(ts)); if (rc < 0) { /* Bung it back on the cache list */ LOG_EP(ci_log("Unable to create s/w filter!")); ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link); return rc; } /* Need to initialise address information. We do this after trying to * insert the sw filter, so we can push the tcp state back onto the * cache queue with as few changes as possible if we fail to add the * sw filter. */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd)); ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link); } #endif ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts))); ci_assert(ts->s.b.state == CI_TCP_CLOSED); ts->s.domain = tls->s.domain; cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache); ci_pmtu_state_init(netif, &ts->s, &ts->pmtus, CI_IP_TIMER_PMTU_DISCOVER); ci_pmtu_set(netif, &ts->pmtus, CI_MIN(ts->s.pkt.mtu, tsr->tcpopts.smss + sizeof(ci_tcp_hdr) + sizeof(ci_ip4_hdr))); /* If we've got SYN via local route, we can handle it */ ci_assert_equiv(ts->s.pkt.status == retrrc_localroute, OO_SP_NOT_NULL(tsr->local_peer)); if( ts->s.pkt.status == retrrc_localroute ) ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE; ts->amss = tsr->amss; /* options and flags */ ts->tcpflags = 0; ts->tcpflags |= tsr->tcpopts.flags; ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED; ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr); if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) { ts->snd_wscl = tsr->tcpopts.wscl_shft; ts->rcv_wscl = tsr->rcv_wscl; } else { ts->snd_wscl = ts->rcv_wscl = 0u; } CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl); CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl); /* Send and receive sequence numbers */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = tsr->snd_isn + 1; ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0); ci_tcp_rx_set_isn(ts, tsr->rcv_nxt); tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1); if( ts->tcpflags & CI_TCPT_FLAG_TSO ) { ts->incoming_tcp_hdr_len += 12; ts->outgoing_hdrs_len += 12; ts->tspaws = ci_tcp_time_now(netif); ts->tsrecent = tsr->tspeer; ts->tslastack = tsr->rcv_nxt; } else { /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ts->timed_ts = tsr->timest; } /* SACK has nothing to be done. */ /* ?? ECN */ ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr))); ts->smss = tsr->tcpopts.smss; ts->c.user_mss = tls->c.user_mss; if (ts->c.user_mss && ts->c.user_mss < ts->smss) ts->smss = ts->c.user_mss; #if CI_CFG_LIMIT_SMSS ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__); #endif ci_assert(ts->smss>0); ci_tcp_set_eff_mss(netif, ts); ci_tcp_set_initialcwnd(netif, ts); /* Copy socket options & related fields that should be inherited. * Note: Windows does not inherit rcvbuf until the call to accept * completes. The assumption here is that all options can be * inherited at the same time (most won't have an effect until there * is a socket available for use by the app.). */ ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)"); /* NB. Must have already set peer (which we have). */ ci_tcp_set_established_state(netif, ts); CITP_STATS_NETIF(++netif->state->stats.synrecv2established); ci_assert(ts->ka_probes == 0); ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts)); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); /* Remove the synrecv structure from the listen queue, and free the ** buffer. */ if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE ) ci_free(tsr); else { ci_tcp_listenq_remove(netif, tls, tsr); ci_tcp_synrecv_free(netif, tsr); } ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT); ci_tcp_acceptq_put(netif, tls, &ts->s.b); LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x", LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags); log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x", LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts), tcp_snd_una(ts), tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts))); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); *ts_out = ts; return 0; }
static int ci_tcp_connect_ul_syn_sent(ci_netif *ni, ci_tcp_state *ts) { int rc = 0; if( ts->s.b.state == CI_TCP_SYN_SENT ) { ci_netif_poll(ni); if( OO_SP_NOT_NULL(ts->local_peer) ) { /* No reason to sleep. Obviously, listener have dropped our syn * because of some reason. Go away! */ ci_tcp_drop(ni, ts, EBUSY); RET_WITH_ERRNO(EBUSY); } CI_TCP_SLEEP_WHILE(ni, ts, CI_SB_FLAG_WAKE_RX, ts->s.so.sndtimeo_msec, ts->s.b.state == CI_TCP_SYN_SENT, &rc); } if( rc == -EAGAIN ) { LOG_TC(log( LNT_FMT "timeout on sleep: %d", LNT_PRI_ARGS(ni, ts), -rc)); if( ! (ts->tcpflags & CI_TCPT_FLAG_NONBLOCK_CONNECT) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; CI_SET_ERROR(rc, EINPROGRESS); } else CI_SET_ERROR(rc, EALREADY); return rc; } else if( rc == -EINTR ) { LOG_TC(log(LNT_FMT "connect() was interrupted by a signal", LNT_PRI_ARGS(ni, ts))); ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; CI_SET_ERROR(rc, EINTR); return rc; } /*! \TODO propagate the correct error code: CONNREFUSED, NOROUTE, etc. */ if( ts->s.b.state == CI_TCP_CLOSED ) { /* Bug 3558: * Set OS socket state to allow/disallow next bind(). * It is Linux hack. */ #ifdef __ci_driver__ CI_TRY(efab_tcp_helper_set_tcp_close_os_sock(netif2tcp_helper_resource(ni), S_SP(ts))); #else CI_TRY(ci_tcp_helper_set_tcp_close_os_sock(ni, S_SP(ts))); #endif /* We should re-bind socket on the next use if the port was determined by * OS. */ if( ! (ts->s.s_flags & CI_SOCK_FLAG_PORT_BOUND) ) ts->s.s_flags |= CI_SOCK_FLAG_CONNECT_MUST_BIND; /* - if SO_ERROR is set, handle it and return this value; * - else if rx_errno is set, return it; * - else (TCP_RX_ERRNO==0, socket is CI_SHUT_RD) return ECONNABORTED */ if( (rc = ci_tcp_connect_handle_so_error(&ts->s)) == 0) rc = TCP_RX_ERRNO(ts) ? TCP_RX_ERRNO(ts) : ECONNABORTED; CI_SET_ERROR(rc, rc); if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) { ts->s.pkt.ip.ip_saddr_be32 = 0; ts->s.cp.ip_laddr_be32 = 0; } return rc; } return 0; }
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_uint32 dst_be32, unsigned dport_be16, int* fail_rc) { ci_ip_pkt_fmt* pkt; int rc = 0; ci_assert(ts->s.pkt.mtu); /* Now that we know the outgoing route, set the MTU related values. * Note, even these values are speculative since the real MTU * could change between now and passing the packet to the lower layers */ ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr); #if CI_CFG_LIMIT_AMSS ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__); #endif /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */ ts->smss = CI_CFG_TCP_DEFAULT_MSS; /* set pmtu, eff_mss, snd_buf and adjust windows */ ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu); ci_tcp_set_eff_mss(ni, ts); ci_tcp_set_initialcwnd(ni, ts); /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay * zero until the connection is established. */ ts->so_sndbuf_pkts = 0; /* * 3. State and address are OK. It's address routed through our NIC. * Do connect(). */ ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY); if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { ci_sock_cmn* s = &ts->s; ci_uint16 source_be16 = 0; if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND ) rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); else rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16); if(CI_LIKELY( rc == 0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(INADDR_ANY), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); *fail_rc = rc; return CI_CONNECT_UL_FAIL; } if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) { CI_SET_ERROR(*fail_rc, EINVAL); return CI_CONNECT_UL_FAIL; } } ci_tcp_set_peer(ts, dst_be32, dport_be16); /* Make sure we can get a buffer before we change state. */ pkt = ci_netif_pkt_tx_tcp_alloc(ni); if( CI_UNLIKELY(! pkt) ) { /* NB. We've already done a poll above. */ rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ); if( ci_netif_pkt_wait_was_interrupted(rc) ) { CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_LOCK_DROPPED; } /* OK, there are (probably) packets available - go try again. Note we * jump back to the top of the function because someone may have * connected this socket in the mean-time, so we need to check the * state once more. */ return CI_CONNECT_UL_START_AGAIN; } #ifdef ONLOAD_OFE if( ni->ofe != NULL ) ts->s.ofe_code_start = ofe_socktbl_find( ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE, tcp_laddr_be32(ts), tcp_raddr_be32(ts), tcp_lport_be16(ts), tcp_rport_be16(ts)); #endif rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL); if( rc < 0 ) { /* Perhaps we've run out of filters? See if we can push a socket out * of timewait and steal its filter. */ ci_assert_nequal(rc, -EFILTERSSOME); if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) || (rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL)) < 0 ) { ci_assert_nequal(rc, -EFILTERSSOME); /* Either a different error, or our efforts to free a filter did not * work. */ if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) { ts->s.pkt.ip.ip_saddr_be32 = 0; ts->s.cp.ip_laddr_be32 = 0; } ci_netif_pkt_release(ni, pkt); CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_FAIL; } } LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16), ip_addr_str(ts->s.pkt.ip.ip_daddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16))); /* We are going to send the SYN - set states appropriately */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = ci_tcp_initial_seqno(ni); ts->snd_max = tcp_snd_nxt(ts) + 1; /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN); ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK; ts->tcpflags |= NI_OPTS(ni).syn_opts; if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) { ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s)); CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl); } else { ts->rcv_wscl = 0; CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0); } ci_tcp_set_rcvbuf(ni, ts); ci_tcp_init_rcv_wnd(ts, "CONNECT"); /* outgoing_hdrs_len is initialised to include timestamp option. */ if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) ) ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr); if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32, ts->s.pkt.ip.ip_daddr_be32) ) ts->tcpflags |= CI_TCPT_FLAG_STRIPE; ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT); /* If the app trys to send data on a socket in SYN_SENT state ** then the data is queued for send until the SYN gets ACKed. ** (rfc793 p56) ** ** Receive calls on the socket should block until data arrives ** (rfc793 p58) ** ** Clearing tx_errno and rx_errno acheive this. The transmit window ** is set to 1 byte which ensures that only the SYN packet gets ** sent until the ACK is received with more window. */ ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1); ts->s.rx_errno = 0; ts->s.tx_errno = 0; ci_tcp_enqueue_no_data(ts, ni, pkt); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS", LNT_PRI_ARGS(ni, ts))); CI_SET_ERROR(*fail_rc, EINPROGRESS); return CI_CONNECT_UL_FAIL; } return CI_CONNECT_UL_OK; }
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) { /* ** ?? error handling on possible fails not handled robustly... ** ?? Need to check port number is valid TODO */ /*! \todo If not bound then we have to be listening on all interfaces. * It's likely that we won't be coming through here as we have to * listen on the OS socket too! */ ci_tcp_state* ts; ci_tcp_socket_listen* tls; ci_netif* netif = ep->netif; ci_sock_cmn* s = ep->s; unsigned ul_backlog = backlog; int rc; oo_p sp; LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), backlog)); CHECK_TEP(ep); if( NI_OPTS(netif).tcp_listen_handover ) return CI_SOCKET_HANDOVER; if( !NI_OPTS(netif).tcp_server_loopback) { /* We should handover if the socket is bound to alien address. */ if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) return CI_SOCKET_HANDOVER; } if( ul_backlog < 0 ) ul_backlog = NI_OPTS(netif).max_ep_bufs; else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog ) ul_backlog = NI_OPTS(netif).acceptq_min_backlog; if( s->b.state == CI_TCP_LISTEN ) { tls = SOCK_TO_TCP_LISTEN(s); tls->acceptq_max = ul_backlog; ci_tcp_helper_listen_os_sock(fd, ul_backlog); return 0; } if( s->b.state != CI_TCP_CLOSED ) { CI_SET_ERROR(rc, EINVAL); return rc; } ts = SOCK_TO_TCP(s); /* Bug 3376: if socket used for a previous, failed, connect then the error * numbers will not be as expected. Only seen when not using listening * netifs (as moving the EP to the new netif resets them). */ ts->s.tx_errno = EPIPE; ts->s.rx_errno = ENOTCONN; /* fill in address/ports and all TCP state */ if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) { ci_uint16 source_be16; /* They haven't previously done a bind, so we need to choose * a port. As we haven't been given a hint we let the OS choose. */ source_be16 = 0; rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); if (CI_LIKELY( rc==0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", LNT_PRI_ARGS(ep->netif, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); return rc; } } ci_sock_lock(netif, &ts->s.b); ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN); tls = SOCK_TO_TCP_LISTEN(&ts->s); tcp_raddr_be32(tls) = 0u; tcp_rport_be16(tls) = 0u; ci_assert_equal(tls->s.tx_errno, EPIPE); ci_assert_equal(tls->s.rx_errno, ENOTCONN); /* setup listen timer - do it before the first return statement, * because __ci_tcp_listen_to_normal() will be called on error path. */ if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { sp = TS_OFF(netif, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid)); ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq"); tls->listenq_tid.param1 = S_SP(tls); tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN; } rc = ci_tcp_listen_init(netif, tls); ci_sock_unlock(netif, &ts->s.b); if( rc != 0 ) { CI_SET_ERROR(rc, -rc); goto listen_fail; } tls->acceptq_max = ul_backlog; CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats)); /* install all the filters needed for this connection * - tcp_laddr_be32(ts) = 0 for IPADDR_ANY * * TODO: handle BINDTODEVICE by setting phys_port paramter to correct * physical L5 port index * TODO: handle REUSEADDR by setting last paramter to TRUE */ if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { #ifdef ONLOAD_OFE if( netif->ofe != NULL ) { tls->s.ofe_code_start = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_LISTEN, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); tls->ofe_promote = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); } #endif rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; else { ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); rc = -ENOBUFS; } } ci_assert_nequal(rc, -EFILTERSSOME); VERB(ci_log("%s: set_filters returned %d", __FUNCTION__, rc)); if (rc < 0) { CI_SET_ERROR(rc, -rc); goto post_listen_fail; } } /* * Call of system listen() is required for listen any, local host * communications server and multi-homed server (to accept connections * to L5 assigned address(es), but incoming from other interfaces). */ #ifdef __ci_driver__ { rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif), S_SP(tls), backlog); } #else rc = ci_tcp_helper_listen_os_sock(fd, backlog); #endif if ( rc < 0 ) { /* clear the filter we've just set */ ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); goto post_listen_fail; } return 0; post_listen_fail: ci_tcp_listenq_drop_all(netif, tls); listen_fail: /* revert TCP state to a non-listening socket format */ __ci_tcp_listen_to_normal(netif, tls); /* Above function sets orphan flag but we are attached to an FD. */ ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); #ifdef __ci_driver__ return rc; #else return CI_SOCKET_ERROR; #endif }