static int ci_udp_disconnect(citp_socket* ep, ci_udp_state* us, ci_fd_t os_sock) { int rc; if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNS_FMT "ERROR: sys_getsockname failed (%d)", FNS_PRI_ARGS(ep->netif, ep->s), errno)); return rc; } ci_udp_set_raddr(us, 0, 0); /* TODO: We shouldn't really clear then set here; instead we should * insert wildcard filters before removing the full-match ones. ie. The * reverse of what we do in connect(). But probably not worth worrying * about in this case. */ ci_udp_clr_filters(ep); #ifdef ONLOAD_OFE if( ep->netif->ofe != NULL ) us->s.ofe_code_start = ofe_socktbl_find( ep->netif->ofe, OFE_SOCKTYPE_UDP, udp_laddr_be32(us), udp_raddr_be32(us), udp_lport_be16(us), udp_rport_be16(us)); #endif if( (rc = ci_udp_set_filters(ep, us)) != 0 ) /* Not too bad -- should still get packets via OS socket. */ LOG_U(log(FNS_FMT "ERROR: ci_udp_set_filters failed (%d)", FNS_PRI_ARGS(ep->netif, ep->s), errno)); us->s.cp.sock_cp_flags &= ~OO_SCP_CONNECTED; return 0; }
/* unpick the ci_ip_timer structure to actually do the callback */ static void ci_ip_timer_docallback(ci_netif *netif, ci_ip_timer* ts) { ci_assert( TIME_LE(ts->time, ci_ip_time_now(netif)) ); ci_assert( ts->time == IPTIMER_STATE(netif)->sched_ticks ); switch(ts->fn){ case CI_IP_TIMER_TCP_RTO: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_rto(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_DELACK: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_delack(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_ZWIN: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_zwin(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_KALIVE: CHECK_TS(netif, SP_TO_TCP(netif, ts->param1)); ci_tcp_timeout_kalive(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_TCP_LISTEN: ci_tcp_timeout_listen(netif, SP_TO_TCP_LISTEN(netif, ts->param1)); break; case CI_IP_TIMER_TCP_CORK: ci_tcp_timeout_cork(netif, SP_TO_TCP(netif, ts->param1)); break; case CI_IP_TIMER_NETIF_TIMEOUT: ci_netif_timeout_state(netif); break; case CI_IP_TIMER_PMTU_DISCOVER: ci_pmtu_timeout_pmtu(netif, SP_TO_TCP(netif, ts->param1)); break; #if CI_CFG_TCP_SOCK_STATS case CI_IP_TIMER_TCP_STATS: ci_tcp_stats_action(netif, SP_TO_TCP(netif, ts->param1), CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_SUPPORT_STATS_COLLECTION case CI_IP_TIMER_NETIF_STATS: ci_netif_stats_action(netif, CI_IP_STATS_FLUSH, CI_IP_STATS_OUTPUT_NONE, NULL, NULL ); break; #endif #if CI_CFG_IP_TIMER_DEBUG case CI_IP_TIMER_DEBUG_HOOK: ci_ip_timer_debug_fn(netif, ts->link.addr, ts->param1); break; #endif default: LOG_U(log( LPF "unknown timer callback code:%x param1:%d", ts->fn, OO_SP_FMT(ts->param1))); CI_DEBUG(ci_fail_stop_fn()); } }
/*! * Tests for valid sockaddr & sockaddr length & AF_INET or AF_INET6. */ static int ci_tcp_validate_sa( sa_family_t domain, const struct sockaddr* sa, socklen_t sa_len ) { /* * Linux deviates from documented behaviour here; * On Linux we return EINVAL if sa and sa_len are NULL and 0 respectively, * and we return EFAULT if sa is NULL and sa_len != 0.... */ if( !sa ) { LOG_U(ci_log(LPF "invalid sockaddr : sa = %lx, sa_len = %d", (long) sa, sa_len)); if( sa_len == 0 ) RET_WITH_ERRNO( EINVAL ); else RET_WITH_ERRNO( EFAULT ); } if( sa_len < sizeof(struct sockaddr_in) #if CI_CFG_FAKE_IPV6 || (domain == AF_INET6 && sa_len < SIN6_LEN_RFC2133) #endif ) { LOG_U( ci_log(LPF "struct too short to be sockaddr_in(6)" )); RET_WITH_ERRNO( EINVAL ); } /* It should be sa->sa_family, but MS wdm does not understand it, * so let's use CI_SIN(sa)->sin_family. */ if (CI_SIN(sa)->sin_family != domain && CI_SIN(sa)->sin_family != AF_UNSPEC) { LOG_U(ci_log(LPF "address family %d does not match " "with socket domain %d", CI_SIN(sa)->sin_family, domain)); RET_WITH_ERRNO(EAFNOSUPPORT); } #if CI_CFG_FAKE_IPV6 if (sa->sa_family == AF_INET6 && !ci_tcp_ipv6_is_ipv4(sa)) { LOG_TC(ci_log(LPF "Pure IPv6 address is not supported")); RET_WITH_ERRNO(EAFNOSUPPORT); } #endif return 0; }
/* Kill an orphan stack in the thc * * You must hold the thc_mutex before calling this function. * * You cannot hold the THR_TABLE.lock when calling this function. */ static void thc_kill_an_orphan(tcp_helper_cluster_t* thc) { tcp_helper_resource_t* thr = NULL; int rc; rc = thc_get_an_orphan(thc, &thr); ci_assert_equal(rc, 0); /* This is generally called when the stack is being freed. But as * we are holding the thc_mutex, we will deadlock if we took that * path. So we remove thr from the thc now. */ thc_remove_thr(thc, thr); LOG_U(ci_log("Clustering: Killing orphan stack %d", thr->id)); rc = tcp_helper_kill_stack_by_id(thr->id); #ifndef NDEBUG if( rc != 0 && rc != -EBUSY ) LOG_U(ci_log("%s: tcp_helper_kill_stack_by_id(%d): failed %d", __FUNCTION__, thr->id, rc)); #endif }
ci_fd_t ci_udp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) { ci_udp_state* us; ci_fd_t fd; VERB( log(LPFIN "ctor( )" ) ); ci_assert(ep); ci_assert(netif); ci_netif_lock(netif); us = ci_udp_get_state_buf(netif); if (!us) { ci_netif_unlock(netif); LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif))); return -ENOMEM; } /* It's required to set protocol before ci_tcp_helper_sock_attach() * since it's used to determine if TCP or UDP file operations should be * attached to the file descriptor in kernel. */ sock_protocol(&us->s) = IPPROTO_UDP; /* NB: this attach will close the os_sock_fd */ fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif), SC_SP(&us->s), domain, type); if( fd < 0 ) { if( fd == -EAFNOSUPPORT ) LOG_U(ci_log("%s: ci_tcp_helper_sock_attach (domain=%d, type=%d) " "failed %d", __FUNCTION__, domain, type, fd)); else LOG_E(ci_log("%s: ci_tcp_helper_sock_attach (domain=%d, type=%d) " "failed %d", __FUNCTION__, domain, type, fd)); ci_netif_unlock(netif); return fd; } ci_assert(~us->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN); us->s.rx_errno = 0; us->s.tx_errno = 0; us->s.so_error = 0; us->s.cp.sock_cp_flags |= OO_SCP_UDP_WILD; ep->s = &us->s; ep->netif = netif; CHECK_UEP(ep); ci_netif_unlock(netif); return fd; }
/* create a pt->pt association with a server * This uses the OS to do all the work so that we don't have to emulate * some of the more unpleasant "tricks" of Linux. * * When we're either handing-over OS-dest connects or when we're "no * failing" connects we may return -2 (unhandled). In this case the * OS socket _has_ been connected & we therefore are handing-over to * a socket in the right state. * * NOTE: WINDOWS the WSPConnect() API is quite a lot more complex than * the BSD one. Therefore, to stop polluting the core code with masses * of Windows frippery, the backing socket connection is successfully * established _before_ this function is called. This function will use * the state of the backing socket to configure the Efab socket - so the * end result is the same (right down to the race between the OS socket * connection being established and our filters being inserted). */ int ci_udp_connect(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen ) { int rc; ci_fd_t os_sock; CHECK_UEP(ep); LOG_UC(log("%s("SF_FMT", addrlen=%d)", __FUNCTION__, SF_PRI_ARGS(ep,fd), addrlen)); os_sock = ci_get_os_sock_fd(fd); if( !CI_IS_VALID_SOCKET( os_sock ) ) { LOG_U(ci_log("%s: no backing socket", __FUNCTION__)); return -1; } /* Because we have not handed over the fd to the OS all calls to bind() * and connect() will have been seen by us - therefore our copies of * the local/remote address & port will be accurate. */ /* Let the OS do the connection - it'll also do the data validation * for free. On failure the OS changes nothing - therefore we * need to leave the filters in place (if such they were). * Because the OS socket and our socket are socket-options-synchronized, * the following call will also check the supplied address according to * the SO_BROADCAST socket option settings. */ rc = ci_sys_connect(os_sock, serv_addr, addrlen); if( rc != 0 ) { LOG_U(log("%s: sys_connect failed errno:%d", __FUNCTION__, errno)); ci_rel_os_sock_fd(os_sock); return -1; } rc = ci_udp_connect_conclude( ep, fd, serv_addr, addrlen, os_sock); ci_rel_os_sock_fd(os_sock); return rc; }
ci_fd_t ci_tcp_ep_ctor(citp_socket* ep, ci_netif* netif, int domain, int type) { ci_tcp_state* ts; ci_fd_t fd; ci_assert(ep); ci_assert(netif); ci_netif_lock(netif); ts = ci_tcp_get_state_buf(netif); if( ts == NULL ) { ci_netif_unlock(netif); LOG_E(ci_log("%s: [%d] out of socket buffers", __FUNCTION__,NI_ID(netif))); return -ENOMEM; } fd = ci_tcp_helper_sock_attach(ci_netif_get_driver_handle(netif), S_SP(ts), domain, type); if( fd < 0 ) { if( fd == -EAFNOSUPPORT ) LOG_U(ci_log("%s: ci_tcp_helper_sock_attach" \ "(domain=%d, type=%d) failed %d", __FUNCTION__, domain, type, fd)); else LOG_E(ci_log("%s: ci_tcp_helper_sock_attach" \ "(domain=%d, type=%d) failed %d", __FUNCTION__, domain, type, fd)); } else { ci_assert(~ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN); /* Apply default sockbuf sizes now we've updated them from the kernel ** defaults. */ ts->s.so.sndbuf = NI_OPTS(netif).tcp_sndbuf_def; ts->s.so.rcvbuf = NI_OPTS(netif).tcp_rcvbuf_def; ep->netif = netif; ep->s = &ts->s; CHECK_TEP(ep); } ci_netif_unlock(netif); return fd; }
int onload_zc_alloc_buffers(int fd, struct onload_zc_iovec* iovecs, int iovecs_len, enum onload_zc_buffer_type_flags flags) { int rc = 0, i; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; ci_ip_pkt_fmt *pkt; unsigned max_len; Log_CALL(ci_log("%s(%d, %p, %d, %x)", __FUNCTION__, fd, iovecs, iovecs_len, flags)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < iovecs_len; ++i ) { max_len = CI_CFG_PKT_BUF_SIZE; pkt = ci_netif_pkt_tx_tcp_alloc(ni); if( pkt == NULL ) { while( --i >= 0 ) ci_netif_pkt_release(ni, (ci_ip_pkt_fmt*)iovecs[i].buf); rc = -ENOMEM; ci_netif_unlock(ni); goto out; } /* Make sure this is clear as it affects behaviour when freeing */ pkt->pf.udp.rx_flags = 0; iovecs[i].buf = (struct oo_zc_buf *)pkt; if( flags & ONLOAD_ZC_BUFFER_HDR_TCP ) { if( (citp_fdinfo_get_type(fdi) == CITP_TCP_SOCKET) && (epi->sock.s->b.state & CI_TCP_STATE_TCP_CONN) ) { ci_tcp_state* ts = SOCK_TO_TCP(epi->sock.s); oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = ((char *)oo_tx_ip_hdr(pkt)) + ts->outgoing_hdrs_len; max_len = tcp_eff_mss(ts); } else { /* Best guess. We can fix it up later. Magic 12 leaves * space for time stamp option (common case) */ oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_tcp_hdr) + 12; } } else if( flags & ONLOAD_ZC_BUFFER_HDR_UDP ) { oo_tx_pkt_layout_init(pkt); iovecs[i].iov_base = (uint8_t*) oo_tx_ip_data(pkt) + sizeof(ci_udp_hdr); } else iovecs[i].iov_base = PKT_START(pkt); iovecs[i].iov_len = CI_CFG_PKT_BUF_SIZE - ((char *)iovecs[i].iov_base - (char *)pkt); if( iovecs[i].iov_len > max_len ) iovecs[i].iov_len = max_len; } ni->state->n_async_pkts += iovecs_len; ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif case CITP_PASSTHROUGH_FD: rc = -ESOCKTNOSUPPORT; break; default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } out: citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }
int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) { int rc = 0, i; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; Log_CALL(ci_log("%s(%d, %p, %d)", __FUNCTION__, fd, bufs, bufs_len)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < bufs_len; ++i ) { ci_ip_pkt_fmt* pkt = (ci_ip_pkt_fmt*)bufs[i]; if( pkt->stack_id != ni->state->stack_id ) { LOG_U(log("%s: attempt to free buffer from stack %d to stack %d", __FUNCTION__, pkt->stack_id, ni->state->stack_id)); rc = -EINVAL; break; } } if( rc == 0 ) { for( i = 0; i < bufs_len; ++i ) ci_netif_pkt_release_check_keep(ni, (ci_ip_pkt_fmt*)bufs[i]); } ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }
/* Complete a UDP U/L connect. The sys connect() call must have been made * (and succeeded) before calling this function. So if anything goes wrong * in here, then it can be consider an internal error or failing of onload. */ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen, ci_fd_t os_sock) { const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr; ci_uint32 dst_be32; ci_udp_state* us = SOCK_TO_UDP(ep->s); int onloadable; int rc = 0; CHECK_UEP(ep); UDP_CLR_FLAG(us, CI_UDPF_EF_SEND); us->s.rx_errno = 0; us->s.tx_errno = 0; if( IS_DISCONNECTING(serv_sin) ) { rc = ci_udp_disconnect(ep, us, os_sock); goto out; } #if CI_CFG_FAKE_IPV6 if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) { LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us))); goto handover; } #endif dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr); if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), errno)); goto out; } us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED; ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port); cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp); switch( us->s.pkt.status ) { case retrrc_success: case retrrc_nomac: onloadable = 1; break; default: onloadable = 0; if( NI_OPTS(ep->netif).udp_connect_handover ) { LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } break; } if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) { LOG_UC(log(FNT_FMT "%s:%d - route via OS socket", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); ci_udp_clr_filters(ep); return 0; } if( CI_IP_IS_LOOPBACK(dst_be32) ) { /* After connecting via loopback it is not possible to connect anywhere * else. */ LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } if( onloadable ) { #ifdef ONLOAD_OFE if( ep->netif->ofe != NULL ) us->s.ofe_code_start = ofe_socktbl_find( ep->netif->ofe, OFE_SOCKTYPE_UDP, udp_laddr_be32(us), udp_raddr_be32(us), udp_lport_be16(us), udp_rport_be16(us)); #endif if( (rc = ci_udp_set_filters(ep, us)) != 0 ) { /* Failed to set filters. Most likely we've run out of h/w filters. * Handover to O/S to avoid breaking the app. * * TODO: Actually we probably won't break the app if we don't * handover, as packets will still get delivered via the kernel * stack. Might be worth having a runtime option to choose whether * or not to handover in such cases. */ LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), rc)); CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter); goto out; } } else { ci_udp_clr_filters(ep); } LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)", SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS", ip_addr_str(udp_laddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), ip_addr_str(udp_raddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); return 0; out: if( rc < 0 && CITP_OPTS.no_fail ) goto handover; return rc; handover: ci_udp_clr_filters(ep); return CI_SOCKET_HANDOVER; }
static int ci_udp_filter_kernel_pkt(ci_netif* ni, ci_udp_state* us, struct msghdr* msg, int *bytes) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; unsigned cb_flags = 0; int i = 0, bytes_remaining = *bytes; if( msg->msg_iovlen > CI_UDP_ZC_IOVEC_MAX ) { LOG_U(log("%s: too many fragments (%d), passing packet unfiltered", __FUNCTION__, (int)msg->msg_iovlen)); return 1; } zc_msg.iov = zc_iovec; zc_msg.msghdr = *msg; zc_msg.msghdr.msg_iov = NULL; ci_assert_gt(msg->msg_iovlen, 0); do { zc_msg.iov[i].iov_base = msg->msg_iov[i].iov_base; zc_msg.iov[i].iov_len = msg->msg_iov[i].iov_len > bytes_remaining ? bytes_remaining : msg->msg_iov[i].iov_len; zc_msg.iov[i].buf = ONLOAD_ZC_HANDLE_NONZC; zc_msg.iov[i].iov_flags = 0; bytes_remaining -= zc_msg.iov[i].iov_len; } while(++i < msg->msg_iovlen && bytes_remaining); zc_msg.msghdr.msg_iovlen = i; rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); ci_assert(!(rc & ONLOAD_ZC_KEEP)); if( rc & ONLOAD_ZC_TERMINATE ) return 0; else { if( rc & ONLOAD_ZC_MODIFIED ) { int new_len = 0; #ifndef NDEBUG int found_shortened_iov = 0; #endif for( i = 0; i < zc_msg.msghdr.msg_iovlen; ++i ) { new_len += zc_msg.iov[i].iov_len; #ifndef NDEBUG if( found_shortened_iov ) ci_assert_equal(zc_msg.iov[i].iov_len, 0); ci_assert_equal(zc_msg.iov[i].iov_base, msg->msg_iov[i].iov_base); if( zc_msg.iov[i].iov_len != msg->msg_iov[i].iov_len ) { ci_assert_lt(zc_msg.iov[i].iov_len, msg->msg_iov[i].iov_len); found_shortened_iov = 1; } #endif } #ifndef NDEBUG if( found_shortened_iov ) ci_assert_lt(new_len, *bytes); else ci_assert_equal(new_len, *bytes); #endif *bytes = new_len; } } return 1; }
int onload_zc_release_buffers(int fd, onload_zc_handle* bufs, int bufs_len) { int rc = 0, i, rx_pkt, released; citp_lib_context_t lib_context; citp_fdinfo* fdi; citp_sock_fdi* epi; ci_netif* ni; ci_ip_pkt_fmt* pkt; Log_CALL(ci_log("%s(%d, %p, %d)", __FUNCTION__, fd, bufs, bufs_len)); citp_enter_lib(&lib_context); if( (fdi = citp_fdtable_lookup(fd)) != NULL ) { switch( citp_fdinfo_get_type(fdi) ) { case CITP_UDP_SOCKET: case CITP_TCP_SOCKET: epi = fdi_to_sock_fdi(fdi); ni = epi->sock.netif; ci_netif_lock(ni); for( i = 0; i < bufs_len; ++i ) { pkt = (ci_ip_pkt_fmt*)bufs[i]; if( pkt->stack_id != ni->state->stack_id ) { LOG_U(log("%s: attempt to free buffer from stack %d to stack %d", __FUNCTION__, pkt->stack_id, ni->state->stack_id)); rc = -EINVAL; break; } } if( rc == 0 ) { for( i = 0; i < bufs_len; ++i ) { pkt = (ci_ip_pkt_fmt*)bufs[i]; /* If we are releasing a packet without the RX_FLAG then the user * allocated and then freed the packet (without using it). * We detect this to decrement n_asyn_pkts. * RX packets (kept via ONLOAD_ZC_KEEP) are counted differently * so don't decrement here. (But may release) */ rx_pkt = pkt->flags & CI_PKT_FLAG_RX; released = ci_netif_pkt_release_check_keep(ni, pkt); if ( ! rx_pkt ) { ci_assert(released == 1); (void) released; --ni->state->n_async_pkts; } } } ci_netif_unlock(ni); break; #if CI_CFG_USERSPACE_EPOLL case CITP_EPOLL_FD: rc = -ENOTSOCK; break; #endif #if CI_CFG_USERSPACE_PIPE case CITP_PIPE_FD: rc = -ENOTSOCK; break; #endif default: LOG_U(log("%s: unknown fdinfo type %d", __FUNCTION__, citp_fdinfo_get_type(fdi))); rc = -EINVAL; } citp_fdinfo_release_ref(fdi, 0); } else { /* Not onload socket */ rc = -ESOCKTNOSUPPORT; } citp_exit_lib(&lib_context, TRUE); Log_CALL_RESULT(rc); return rc; }
/* ** promote a synrecv structure to an established socket ** ** Assumes that the caller will handle a fail if we can't allocate a new ** tcp_state structure due to memory pressure or the like */ int ci_tcp_listenq_try_promote(ci_netif* netif, ci_tcp_socket_listen* tls, ci_tcp_state_synrecv* tsr, ci_ip_cached_hdrs* ipcache, ci_tcp_state** ts_out) { int rc = 0; ci_assert(netif); ci_assert(tls); ci_assert(tls->s.b.state == CI_TCP_LISTEN); ci_assert(tsr); if( (int) ci_tcp_acceptq_n(tls) < tls->acceptq_max ) { ci_tcp_state* ts; /* grab a tcp_state structure that will go onto the accept queue. We take * from the cache of EPs if any are available */ ts = get_ts_from_cache (netif, tsr, tls); if( !ts ) { /* None on cache; try allocating a new ts */ ts = ci_tcp_get_state_buf(netif); #if CI_CFG_FD_CACHING if( ts == NULL ) { /* We've reaped. Did this result in any being cached */ ts = get_ts_from_cache(netif, tsr, tls); if (ts == NULL ) { /* No -- try again to allocate. */ ts = ci_tcp_get_state_buf(netif); } else { CITP_STATS_NETIF(++netif->state->stats.sockcache_hit_reap); } } #endif if( ts == NULL ) { LOG_TV(ci_log("%s: [%d] out of socket buffers", __FUNCTION__, NI_ID(netif))); CITP_STATS_TCP_LISTEN(++tls->stats.n_acceptq_no_sock); CI_SET_SO_ERROR(&tls->s, ENOMEM); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); return -ENOMEM; } ci_assert(ci_tcp_is_cached(ts) || (ts->s.b.sb_aflags & CI_SB_AFLAG_ORPHAN)); } #ifdef ONLOAD_OFE ts->s.ofe_code_start = tls->ofe_promote; #endif if( ! ci_tcp_is_cached(ts) ) { /* Need to initialise address information for use when setting filters */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); /* "borrow" filter from listening socket. For loopback socket, we * do not need filters, but we have to take a reference of the OS * socket. */ rc = ci_tcp_ep_set_filters(netif, S_SP(ts), ts->s.cp.so_bindtodevice, S_SP(tls)); if( rc < 0 ) { LOG_U(ci_log("%s: Unable to set filters %d", __FUNCTION__, rc)); /* Either put this back on the list (at the head) or free it */ ci_tcp_state_free(netif, ts); return rc; } } #if CI_CFG_FD_CACHING else { /* Now set the s/w filter. We leave the hw filter in place for cached * EPS. This will probably not have the correct raddr and rport, but as * it's sharing the listening socket's filter that's not a problem. It * will be updated if this is still around when the listener is closed. */ rc = ci_netif_filter_insert(netif, S_SP(ts), tsr->l_addr, sock_lport_be16(&tls->s), tsr->r_addr, tsr->r_port, tcp_protocol(ts)); if (rc < 0) { /* Bung it back on the cache list */ LOG_EP(ci_log("Unable to create s/w filter!")); ci_ni_dllist_push(netif, &tls->epcache.cache, &ts->epcache_link); return rc; } /* Need to initialise address information. We do this after trying to * insert the sw filter, so we can push the tcp state back onto the * cache queue with as few changes as possible if we fail to add the * sw filter. */ ci_tcp_set_addr_on_promote(netif, ts, tsr, tls); LOG_EP(ci_log("Cached fd %d from cached to connected", ts->cached_on_fd)); ci_ni_dllist_push(netif, &tls->epcache_connected, &ts->epcache_link); } #endif ci_assert(IS_VALID_SOCK_P(netif, S_SP(ts))); ci_assert(ts->s.b.state == CI_TCP_CLOSED); ts->s.domain = tls->s.domain; cicp_ip_cache_update_from(netif, &ts->s.pkt, ipcache); ci_pmtu_state_init(netif, &ts->s, &ts->pmtus, CI_IP_TIMER_PMTU_DISCOVER); ci_pmtu_set(netif, &ts->pmtus, CI_MIN(ts->s.pkt.mtu, tsr->tcpopts.smss + sizeof(ci_tcp_hdr) + sizeof(ci_ip4_hdr))); /* If we've got SYN via local route, we can handle it */ ci_assert_equiv(ts->s.pkt.status == retrrc_localroute, OO_SP_NOT_NULL(tsr->local_peer)); if( ts->s.pkt.status == retrrc_localroute ) ts->s.pkt.flags |= CI_IP_CACHE_IS_LOCALROUTE; ts->amss = tsr->amss; /* options and flags */ ts->tcpflags = 0; ts->tcpflags |= tsr->tcpopts.flags; ts->tcpflags |= CI_TCPT_FLAG_PASSIVE_OPENED; ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr) + sizeof(ci_tcp_hdr); if( ts->tcpflags & CI_TCPT_FLAG_WSCL ) { ts->snd_wscl = tsr->tcpopts.wscl_shft; ts->rcv_wscl = tsr->rcv_wscl; } else { ts->snd_wscl = ts->rcv_wscl = 0u; } CI_IP_SOCK_STATS_VAL_TXWSCL( ts, ts->snd_wscl); CI_IP_SOCK_STATS_VAL_RXWSCL( ts, ts->rcv_wscl); /* Send and receive sequence numbers */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = tsr->snd_isn + 1; ci_tcp_set_snd_max(ts, tsr->rcv_nxt, tcp_snd_una(ts), 0); ci_tcp_rx_set_isn(ts, tsr->rcv_nxt); tcp_rcv_up(ts) = SEQ_SUB(tcp_rcv_nxt(ts), 1); if( ts->tcpflags & CI_TCPT_FLAG_TSO ) { ts->incoming_tcp_hdr_len += 12; ts->outgoing_hdrs_len += 12; ts->tspaws = ci_tcp_time_now(netif); ts->tsrecent = tsr->tspeer; ts->tslastack = tsr->rcv_nxt; } else { /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ts->timed_ts = tsr->timest; } /* SACK has nothing to be done. */ /* ?? ECN */ ci_tcp_set_hdr_len(ts, (ts->outgoing_hdrs_len - sizeof(ci_ip4_hdr))); ts->smss = tsr->tcpopts.smss; ts->c.user_mss = tls->c.user_mss; if (ts->c.user_mss && ts->c.user_mss < ts->smss) ts->smss = ts->c.user_mss; #if CI_CFG_LIMIT_SMSS ts->smss = ci_tcp_limit_mss(ts->smss, netif, __FUNCTION__); #endif ci_assert(ts->smss>0); ci_tcp_set_eff_mss(netif, ts); ci_tcp_set_initialcwnd(netif, ts); /* Copy socket options & related fields that should be inherited. * Note: Windows does not inherit rcvbuf until the call to accept * completes. The assumption here is that all options can be * inherited at the same time (most won't have an effect until there * is a socket available for use by the app.). */ ci_tcp_inherit_accept_options(netif, tls, ts, "SYN RECV (LISTENQ PROMOTE)"); /* NB. Must have already set peer (which we have). */ ci_tcp_set_established_state(netif, ts); CITP_STATS_NETIF(++netif->state->stats.synrecv2established); ci_assert(ts->ka_probes == 0); ci_tcp_kalive_restart(netif, ts, ci_tcp_kalive_idle_get(ts)); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); /* Remove the synrecv structure from the listen queue, and free the ** buffer. */ if( tsr->tcpopts.flags & CI_TCPT_FLAG_SYNCOOKIE ) ci_free(tsr); else { ci_tcp_listenq_remove(netif, tls, tsr); ci_tcp_synrecv_free(netif, tsr); } ci_bit_set(&ts->s.b.sb_aflags, CI_SB_AFLAG_TCP_IN_ACCEPTQ_BIT); ci_tcp_acceptq_put(netif, tls, &ts->s.b); LOG_TC(log(LNT_FMT "new ts=%d SYN-RECV->ESTABLISHED flags=0x%x", LNT_PRI_ARGS(netif, tls), S_FMT(ts), ts->tcpflags); log(LNTS_FMT RCV_WND_FMT " snd=%08x-%08x-%08x enq=%08x", LNTS_PRI_ARGS(netif, ts), RCV_WND_ARGS(ts), tcp_snd_una(ts), tcp_snd_nxt(ts), ts->snd_max, tcp_enq_nxt(ts))); citp_waitable_wake(netif, &tls->s.b, CI_SB_FLAG_WAKE_RX); *ts_out = ts; return 0; }
static int ci_tcp_connect_ul_start(ci_netif *ni, ci_tcp_state* ts, ci_uint32 dst_be32, unsigned dport_be16, int* fail_rc) { ci_ip_pkt_fmt* pkt; int rc = 0; ci_assert(ts->s.pkt.mtu); /* Now that we know the outgoing route, set the MTU related values. * Note, even these values are speculative since the real MTU * could change between now and passing the packet to the lower layers */ ts->amss = ts->s.pkt.mtu - sizeof(ci_tcp_hdr) - sizeof(ci_ip4_hdr); #if CI_CFG_LIMIT_AMSS ts->amss = ci_tcp_limit_mss(ts->amss, ni, __FUNCTION__); #endif /* Default smss until discovered by MSS option in SYN - RFC1122 4.2.2.6 */ ts->smss = CI_CFG_TCP_DEFAULT_MSS; /* set pmtu, eff_mss, snd_buf and adjust windows */ ci_pmtu_set(ni, &ts->pmtus, ts->s.pkt.mtu); ci_tcp_set_eff_mss(ni, ts); ci_tcp_set_initialcwnd(ni, ts); /* Send buffer adjusted by ci_tcp_set_eff_mss(), but we want it to stay * zero until the connection is established. */ ts->so_sndbuf_pkts = 0; /* * 3. State and address are OK. It's address routed through our NIC. * Do connect(). */ ci_assert_nequal(ts->s.pkt.ip.ip_saddr_be32, INADDR_ANY); if( ts->s.s_flags & CI_SOCK_FLAG_CONNECT_MUST_BIND ) { ci_sock_cmn* s = &ts->s; ci_uint16 source_be16 = 0; if( s->s_flags & CI_SOCK_FLAG_ADDR_BOUND ) rc = __ci_bind(ni, &ts->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); else rc = __ci_bind(ni, &ts->s, INADDR_ANY, &source_be16); if(CI_LIKELY( rc == 0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "connect: our bind returned %s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(INADDR_ANY), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); *fail_rc = rc; return CI_CONNECT_UL_FAIL; } if(CI_UNLIKELY( ts->s.pkt.ip.ip_saddr_be32 == 0 )) { CI_SET_ERROR(*fail_rc, EINVAL); return CI_CONNECT_UL_FAIL; } } ci_tcp_set_peer(ts, dst_be32, dport_be16); /* Make sure we can get a buffer before we change state. */ pkt = ci_netif_pkt_tx_tcp_alloc(ni); if( CI_UNLIKELY(! pkt) ) { /* NB. We've already done a poll above. */ rc = ci_netif_pkt_wait(ni, &ts->s, CI_SLEEP_NETIF_LOCKED|CI_SLEEP_NETIF_RQ); if( ci_netif_pkt_wait_was_interrupted(rc) ) { CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_LOCK_DROPPED; } /* OK, there are (probably) packets available - go try again. Note we * jump back to the top of the function because someone may have * connected this socket in the mean-time, so we need to check the * state once more. */ return CI_CONNECT_UL_START_AGAIN; } #ifdef ONLOAD_OFE if( ni->ofe != NULL ) ts->s.ofe_code_start = ofe_socktbl_find( ni->ofe, OFE_SOCKTYPE_TCP_ACTIVE, tcp_laddr_be32(ts), tcp_raddr_be32(ts), tcp_lport_be16(ts), tcp_rport_be16(ts)); #endif rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL); if( rc < 0 ) { /* Perhaps we've run out of filters? See if we can push a socket out * of timewait and steal its filter. */ ci_assert_nequal(rc, -EFILTERSSOME); if( rc != -EBUSY || ! ci_netif_timewait_try_to_free_filter(ni) || (rc = ci_tcp_ep_set_filters(ni, S_SP(ts), ts->s.cp.so_bindtodevice, OO_SP_NULL)) < 0 ) { ci_assert_nequal(rc, -EFILTERSSOME); /* Either a different error, or our efforts to free a filter did not * work. */ if( ! (ts->s.s_flags & CI_SOCK_FLAG_ADDR_BOUND) ) { ts->s.pkt.ip.ip_saddr_be32 = 0; ts->s.cp.ip_laddr_be32 = 0; } ci_netif_pkt_release(ni, pkt); CI_SET_ERROR(*fail_rc, -rc); return CI_CONNECT_UL_FAIL; } } LOG_TC(log(LNT_FMT "CONNECT %s:%u->%s:%u", LNT_PRI_ARGS(ni, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16), ip_addr_str(ts->s.pkt.ip.ip_daddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_dest_be16))); /* We are going to send the SYN - set states appropriately */ tcp_snd_una(ts) = tcp_snd_nxt(ts) = tcp_enq_nxt(ts) = tcp_snd_up(ts) = ci_tcp_initial_seqno(ni); ts->snd_max = tcp_snd_nxt(ts) + 1; /* Must be after initialising snd_una. */ ci_tcp_clear_rtt_timing(ts); ci_tcp_set_flags(ts, CI_TCP_FLAG_SYN); ts->tcpflags &=~ CI_TCPT_FLAG_OPT_MASK; ts->tcpflags |= NI_OPTS(ni).syn_opts; if( (ts->tcpflags & CI_TCPT_FLAG_WSCL) ) { ts->rcv_wscl = ci_tcp_wscl_by_buff(ni, ci_tcp_rcvbuf_established(ni, &ts->s)); CI_IP_SOCK_STATS_VAL_RXWSCL(ts, ts->rcv_wscl); } else { ts->rcv_wscl = 0; CI_IP_SOCK_STATS_VAL_RXWSCL(ts, 0); } ci_tcp_set_rcvbuf(ni, ts); ci_tcp_init_rcv_wnd(ts, "CONNECT"); /* outgoing_hdrs_len is initialised to include timestamp option. */ if( ! (ts->tcpflags & CI_TCPT_FLAG_TSO) ) ts->outgoing_hdrs_len = sizeof(ci_ip4_hdr)+sizeof(ci_tcp_hdr); if( ci_tcp_can_stripe(ni, ts->s.pkt.ip.ip_saddr_be32, ts->s.pkt.ip.ip_daddr_be32) ) ts->tcpflags |= CI_TCPT_FLAG_STRIPE; ci_tcp_set_slow_state(ni, ts, CI_TCP_SYN_SENT); /* If the app trys to send data on a socket in SYN_SENT state ** then the data is queued for send until the SYN gets ACKed. ** (rfc793 p56) ** ** Receive calls on the socket should block until data arrives ** (rfc793 p58) ** ** Clearing tx_errno and rx_errno acheive this. The transmit window ** is set to 1 byte which ensures that only the SYN packet gets ** sent until the ACK is received with more window. */ ci_assert(ts->snd_max == tcp_snd_nxt(ts) + 1); ts->s.rx_errno = 0; ts->s.tx_errno = 0; ci_tcp_enqueue_no_data(ts, ni, pkt); ci_tcp_set_flags(ts, CI_TCP_FLAG_ACK); if( ts->s.b.sb_aflags & (CI_SB_AFLAG_O_NONBLOCK | CI_SB_AFLAG_O_NDELAY) ) { ts->tcpflags |= CI_TCPT_FLAG_NONBLOCK_CONNECT; LOG_TC(log( LNT_FMT "Non-blocking connect - return EINPROGRESS", LNT_PRI_ARGS(ni, ts))); CI_SET_ERROR(*fail_rc, EINPROGRESS); return CI_CONNECT_UL_FAIL; } return CI_CONNECT_UL_OK; }
int ci_tcp_listen(citp_socket* ep, ci_fd_t fd, int backlog) { /* ** ?? error handling on possible fails not handled robustly... ** ?? Need to check port number is valid TODO */ /*! \todo If not bound then we have to be listening on all interfaces. * It's likely that we won't be coming through here as we have to * listen on the OS socket too! */ ci_tcp_state* ts; ci_tcp_socket_listen* tls; ci_netif* netif = ep->netif; ci_sock_cmn* s = ep->s; unsigned ul_backlog = backlog; int rc; oo_p sp; LOG_TC(log("%s "SK_FMT" listen backlog=%d", __FUNCTION__, SK_PRI_ARGS(ep), backlog)); CHECK_TEP(ep); if( NI_OPTS(netif).tcp_listen_handover ) return CI_SOCKET_HANDOVER; if( !NI_OPTS(netif).tcp_server_loopback) { /* We should handover if the socket is bound to alien address. */ if( s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) return CI_SOCKET_HANDOVER; } if( ul_backlog < 0 ) ul_backlog = NI_OPTS(netif).max_ep_bufs; else if( ul_backlog < NI_OPTS(netif).acceptq_min_backlog ) ul_backlog = NI_OPTS(netif).acceptq_min_backlog; if( s->b.state == CI_TCP_LISTEN ) { tls = SOCK_TO_TCP_LISTEN(s); tls->acceptq_max = ul_backlog; ci_tcp_helper_listen_os_sock(fd, ul_backlog); return 0; } if( s->b.state != CI_TCP_CLOSED ) { CI_SET_ERROR(rc, EINVAL); return rc; } ts = SOCK_TO_TCP(s); /* Bug 3376: if socket used for a previous, failed, connect then the error * numbers will not be as expected. Only seen when not using listening * netifs (as moving the EP to the new netif resets them). */ ts->s.tx_errno = EPIPE; ts->s.rx_errno = ENOTCONN; /* fill in address/ports and all TCP state */ if( !(ts->s.s_flags & CI_SOCK_FLAG_BOUND) ) { ci_uint16 source_be16; /* They haven't previously done a bind, so we need to choose * a port. As we haven't been given a hint we let the OS choose. */ source_be16 = 0; rc = __ci_bind(ep->netif, ep->s, ts->s.pkt.ip.ip_saddr_be32, &source_be16); if (CI_LIKELY( rc==0 )) { TS_TCP(ts)->tcp_source_be16 = source_be16; ts->s.cp.lport_be16 = source_be16; LOG_TC(log(LNT_FMT "listen: our bind returned %s:%u", LNT_PRI_ARGS(ep->netif, ts), ip_addr_str(ts->s.pkt.ip.ip_saddr_be32), (unsigned) CI_BSWAP_BE16(TS_TCP(ts)->tcp_source_be16))); } else { LOG_U(ci_log("__ci_bind returned %d at %s:%d", CI_GET_ERROR(rc), __FILE__, __LINE__)); return rc; } } ci_sock_lock(netif, &ts->s.b); ci_tcp_set_slow_state(netif, ts, CI_TCP_LISTEN); tls = SOCK_TO_TCP_LISTEN(&ts->s); tcp_raddr_be32(tls) = 0u; tcp_rport_be16(tls) = 0u; ci_assert_equal(tls->s.tx_errno, EPIPE); ci_assert_equal(tls->s.rx_errno, ENOTCONN); /* setup listen timer - do it before the first return statement, * because __ci_tcp_listen_to_normal() will be called on error path. */ if( ~tls->s.s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { sp = TS_OFF(netif, tls); OO_P_ADD(sp, CI_MEMBER_OFFSET(ci_tcp_socket_listen, listenq_tid)); ci_ip_timer_init(netif, &tls->listenq_tid, sp, "lstq"); tls->listenq_tid.param1 = S_SP(tls); tls->listenq_tid.fn = CI_IP_TIMER_TCP_LISTEN; } rc = ci_tcp_listen_init(netif, tls); ci_sock_unlock(netif, &ts->s.b); if( rc != 0 ) { CI_SET_ERROR(rc, -rc); goto listen_fail; } tls->acceptq_max = ul_backlog; CITP_STATS_TCP_LISTEN(CI_ZERO(&tls->stats)); /* install all the filters needed for this connection * - tcp_laddr_be32(ts) = 0 for IPADDR_ANY * * TODO: handle BINDTODEVICE by setting phys_port paramter to correct * physical L5 port index * TODO: handle REUSEADDR by setting last paramter to TRUE */ if( ~s->s_flags & CI_SOCK_FLAG_BOUND_ALIEN ) { #ifdef ONLOAD_OFE if( netif->ofe != NULL ) { tls->s.ofe_code_start = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_LISTEN, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); tls->ofe_promote = ofe_socktbl_find( netif->ofe, OFE_SOCKTYPE_TCP_PASSIVE, tcp_laddr_be32(tls), INADDR_ANY, tcp_lport_be16(ts), 0); } #endif rc = ci_tcp_ep_set_filters(netif, S_SP(tls), tls->s.cp.so_bindtodevice, OO_SP_NULL); if( rc == -EFILTERSSOME ) { if( CITP_OPTS.no_fail ) rc = 0; else { ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); rc = -ENOBUFS; } } ci_assert_nequal(rc, -EFILTERSSOME); VERB(ci_log("%s: set_filters returned %d", __FUNCTION__, rc)); if (rc < 0) { CI_SET_ERROR(rc, -rc); goto post_listen_fail; } } /* * Call of system listen() is required for listen any, local host * communications server and multi-homed server (to accept connections * to L5 assigned address(es), but incoming from other interfaces). */ #ifdef __ci_driver__ { rc = efab_tcp_helper_listen_os_sock( netif2tcp_helper_resource(netif), S_SP(tls), backlog); } #else rc = ci_tcp_helper_listen_os_sock(fd, backlog); #endif if ( rc < 0 ) { /* clear the filter we've just set */ ci_tcp_ep_clear_filters(netif, S_SP(tls), 0); goto post_listen_fail; } return 0; post_listen_fail: ci_tcp_listenq_drop_all(netif, tls); listen_fail: /* revert TCP state to a non-listening socket format */ __ci_tcp_listen_to_normal(netif, tls); /* Above function sets orphan flag but we are attached to an FD. */ ci_bit_clear(&tls->s.b.sb_aflags, CI_SB_AFLAG_ORPHAN_BIT); #ifdef __ci_driver__ return rc; #else return CI_SOCKET_ERROR; #endif }