int ci_udp_shutdown(citp_socket* ep, ci_fd_t fd, int how) { ci_fd_t os_sock; int rc; CHECK_UEP(ep); LOG_UV(log(LPF "shutdown("SF_FMT", %d)", SF_PRI_ARGS(ep,fd), how)); os_sock = ci_get_os_sock_fd(fd); if( CI_IS_VALID_SOCKET( os_sock ) ) { rc = ci_sys_shutdown(os_sock, how); ci_rel_os_sock_fd( os_sock ); if( rc < 0 ) return CI_SOCKET_ERROR; } rc = __ci_udp_shutdown(ep->netif, SOCK_TO_UDP(ep->s), how); if( rc < 0 ) { CI_SET_ERROR(rc, -rc); return rc; } return 0; }
void ci_udp_handle_force_reuseport(ci_fd_t fd, citp_socket* ep, const struct sockaddr* sa, socklen_t sa_len) { int rc; if( CITP_OPTS.udp_reuseports != 0 && ((struct sockaddr_in*)sa)->sin_port != 0 ) { struct ci_port_list *force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, (ci_dllist*)(ci_uintptr_t)CITP_OPTS.udp_reuseports) { if( force_reuseport->port == ((struct sockaddr_in*)sa)->sin_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); /* Fixme: shouldn't we handle errors? */ if( rc != 0 ) { log("%s: failed to set SO_REUSEPORT on OS socket: " "rc=%d errno=%d", __func__, rc, errno); } ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; LOG_UC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", __FUNCTION__, SF_PRI_ARGS(ep, fd), force_reuseport->port)); } } } }
/* To handle bind we just let the underlying OS socket make all * of the decisions for us. If The bind leaves things such that * the source address is not one of ours then we hand it over to the * OS (by returning CI_SOCKET_HANDOVER) - in which case the OS socket * will be bound as expected. */ int ci_udp_bind(citp_socket* ep, ci_fd_t fd, const struct sockaddr* addr, socklen_t addrlen) { int rc; ci_uint16 local_port; CHECK_UEP(ep); LOG_UC(log("%s("SF_FMT", addrlen=%d)", __FUNCTION__, SF_PRI_ARGS(ep,fd), addrlen)); /* Make sure we have no filters. * * ?? TODO: Under what circumstances could we possibly have filters here? * _WIN32 only perhaps? */ ci_udp_clr_filters(ep); rc = ci_tcp_helper_bind_os_sock(fd, addr, addrlen, &local_port); if( rc == CI_SOCKET_ERROR ) return rc; return ci_udp_bind_conclude(ep, addr, local_port ); }
/* create a pt->pt association with a server * This uses the OS to do all the work so that we don't have to emulate * some of the more unpleasant "tricks" of Linux. * * When we're either handing-over OS-dest connects or when we're "no * failing" connects we may return -2 (unhandled). In this case the * OS socket _has_ been connected & we therefore are handing-over to * a socket in the right state. * * NOTE: WINDOWS the WSPConnect() API is quite a lot more complex than * the BSD one. Therefore, to stop polluting the core code with masses * of Windows frippery, the backing socket connection is successfully * established _before_ this function is called. This function will use * the state of the backing socket to configure the Efab socket - so the * end result is the same (right down to the race between the OS socket * connection being established and our filters being inserted). */ int ci_udp_connect(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen ) { int rc; ci_fd_t os_sock; CHECK_UEP(ep); LOG_UC(log("%s("SF_FMT", addrlen=%d)", __FUNCTION__, SF_PRI_ARGS(ep,fd), addrlen)); os_sock = ci_get_os_sock_fd(fd); if( !CI_IS_VALID_SOCKET( os_sock ) ) { LOG_U(ci_log("%s: no backing socket", __FUNCTION__)); return -1; } /* Because we have not handed over the fd to the OS all calls to bind() * and connect() will have been seen by us - therefore our copies of * the local/remote address & port will be accurate. */ /* Let the OS do the connection - it'll also do the data validation * for free. On failure the OS changes nothing - therefore we * need to leave the filters in place (if such they were). * Because the OS socket and our socket are socket-options-synchronized, * the following call will also check the supplied address according to * the SO_BROADCAST socket option settings. */ rc = ci_sys_connect(os_sock, serv_addr, addrlen); if( rc != 0 ) { LOG_U(log("%s: sys_connect failed errno:%d", __FUNCTION__, errno)); ci_rel_os_sock_fd(os_sock); return -1; } rc = ci_udp_connect_conclude( ep, fd, serv_addr, addrlen, os_sock); ci_rel_os_sock_fd(os_sock); return rc; }
/* Complete a UDP U/L connect. The sys connect() call must have been made * (and succeeded) before calling this function. So if anything goes wrong * in here, then it can be consider an internal error or failing of onload. */ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen, ci_fd_t os_sock) { const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr; ci_uint32 dst_be32; ci_udp_state* us = SOCK_TO_UDP(ep->s); int onloadable; int rc = 0; CHECK_UEP(ep); UDP_CLR_FLAG(us, CI_UDPF_EF_SEND); us->s.rx_errno = 0; us->s.tx_errno = 0; if( IS_DISCONNECTING(serv_sin) ) { rc = ci_udp_disconnect(ep, us, os_sock); goto out; } #if CI_CFG_FAKE_IPV6 if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) { LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us))); goto handover; } #endif dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr); if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), errno)); goto out; } us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED; ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port); cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp); switch( us->s.pkt.status ) { case retrrc_success: case retrrc_nomac: onloadable = 1; break; default: onloadable = 0; if( NI_OPTS(ep->netif).udp_connect_handover ) { LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } break; } if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) { LOG_UC(log(FNT_FMT "%s:%d - route via OS socket", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); ci_udp_clr_filters(ep); return 0; } if( CI_IP_IS_LOOPBACK(dst_be32) ) { /* After connecting via loopback it is not possible to connect anywhere * else. */ LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } if( onloadable ) { #ifdef ONLOAD_OFE if( ep->netif->ofe != NULL ) us->s.ofe_code_start = ofe_socktbl_find( ep->netif->ofe, OFE_SOCKTYPE_UDP, udp_laddr_be32(us), udp_raddr_be32(us), udp_lport_be16(us), udp_rport_be16(us)); #endif if( (rc = ci_udp_set_filters(ep, us)) != 0 ) { /* Failed to set filters. Most likely we've run out of h/w filters. * Handover to O/S to avoid breaking the app. * * TODO: Actually we probably won't break the app if we don't * handover, as packets will still get delivered via the kernel * stack. Might be worth having a runtime option to choose whether * or not to handover in such cases. */ LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), rc)); CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter); goto out; } } else { ci_udp_clr_filters(ep); } LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)", SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS", ip_addr_str(udp_laddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), ip_addr_str(udp_raddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); return 0; out: if( rc < 0 && CITP_OPTS.no_fail ) goto handover; return rc; handover: ci_udp_clr_filters(ep); return CI_SOCKET_HANDOVER; }
/* In this bind handler we just check that the address to which * are binding is either "any" or one of ours. * In the Linux kernel version [fd] is unused. */ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, socklen_t addrlen, ci_fd_t fd ) { struct sockaddr_in* my_addr_in; ci_uint16 new_port; ci_uint32 addr_be32; ci_sock_cmn* s = ep->s; ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp; int rc; CHECK_TEP(ep); my_addr_in = (struct sockaddr_in*) my_addr; /* Check if state of the socket is OK for bind operation. */ /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used. * What is better? */ if (my_addr == NULL) RET_WITH_ERRNO( EINVAL ); if (s->b.state != CI_TCP_CLOSED) RET_WITH_ERRNO( EINVAL ); if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB) RET_WITH_ERRNO( EINVAL ); if( my_addr->sa_family != s->domain ) RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL ); /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) * Linux is also relaxed about overlength data areas. */ if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in)) RET_WITH_ERRNO( EINVAL ); #if CI_CFG_FAKE_IPV6 if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133) RET_WITH_ERRNO( EINVAL ); if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) ) return CI_SOCKET_HANDOVER; #endif addr_be32 = ci_get_ip4_addr(s->domain, my_addr); /* Using the port number provided, see if we can do this bind */ new_port = my_addr_in->sin_port; if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) { struct ci_port_list *force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) { if( force_reuseport->port == new_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); if( rc != 0 && errno == ENOPROTOOPT ) ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY; ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); } } } if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) CI_LOGLEVEL_TRY_RET(LOG_TV, __ci_bind(ep->netif, ep->s, addr_be32, &new_port)); ep->s->s_flags |= CI_SOCK_FLAG_BOUND; sock_lport_be16(s) = new_port; sock_laddr_be32(s) = addr_be32; if( CI_IP_IS_MULTICAST(addr_be32) ) s->cp.ip_laddr_be32 = 0; else s->cp.ip_laddr_be32 = addr_be32; s->cp.lport_be16 = new_port; sock_rport_be16(s) = sock_raddr_be32(s) = 0; LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32), (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port), CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); return 0; }