void ci_udp_set_laddr(citp_socket* ep, unsigned laddr_be32, int lport_be16) { ci_udp_state* us = SOCK_TO_UDP(ep->s); udp_laddr_be32(us) = laddr_be32; udp_lport_be16(us) = (ci_uint16) lport_be16; if( CI_IP_IS_MULTICAST(laddr_be32) ) us->s.cp.ip_laddr_be32 = 0; else us->s.cp.ip_laddr_be32 = laddr_be32; us->s.cp.lport_be16 = lport_be16; }
void ci_udp_set_laddr(citp_socket* ep, ci_addr_t addr, int lport_be16) { ci_udp_state* us = SOCK_TO_UDP(ep->s); ci_sock_set_laddr(&us->s, addr, lport_be16); #if CI_CFG_IPV6 if( ipcache_is_ipv6(us->s.pkt) ) return; #endif if( CI_IP_IS_MULTICAST(addr.ip4) ) us->s.cp.ip_laddr_be32 = 0; else us->s.cp.ip_laddr_be32 = addr.ip4; us->s.cp.lport_be16 = lport_be16; }
int ci_udp_should_handover(citp_socket* ep, const struct sockaddr* addr, ci_uint16 lport) { ci_uint32 addr_be32; #if CI_CFG_FAKE_IPV6 if( ep->s->domain == AF_INET6 && ! ci_tcp_ipv6_is_ipv4(addr) ) goto handover; #endif if( (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover_min && CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover_max) || (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover2_min && CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover2_max) || (CI_BSWAP_BE16(lport) >= NI_OPTS(ep->netif).udp_port_handover3_min && CI_BSWAP_BE16(lport) <= NI_OPTS(ep->netif).udp_port_handover3_max) ) { LOG_UC(log(FNS_FMT "HANDOVER (%d <= %d <= %d)", FNS_PRI_ARGS(ep->netif, ep->s), NI_OPTS(ep->netif).udp_port_handover_min, CI_BSWAP_BE16(lport), NI_OPTS(ep->netif).udp_port_handover_max)); goto handover; } addr_be32 = ci_get_ip4_addr(ep->s->domain, addr); if( addr_be32 != CI_BSWAPC_BE32(INADDR_ANY) && ! cicp_user_addr_is_local_efab(CICP_HANDLE(ep->netif), &addr_be32) && ! CI_IP_IS_MULTICAST(addr_be32) ) { /* Either the bind/getsockname indicated that we need to let the OS * take this or the local address is not one of ours - so we can safely * hand-over as bind to a non-ANY addr cannot be revoked. * The filters (if any) have already been removed, so we just get out. */ goto handover; } return 0; handover: return 1; }
static int do_clear_affinity(const char* protos, struct sockaddr_in la, struct sockaddr_in ra) { int proto = str_to_proto(protos); int i, ifindex; refresh_ip_list(); if( la.sin_addr.s_addr == 0 ) { for( i = 0; i < ip_list_n; ++i ) if( interface_driver_is(ip_list_name(i), "sfc") ) clear_affinity(interface_to_ifindex(ip_list_name(i)), proto, ip_list_ip(i), la.sin_port, ra.sin_addr.s_addr, ra.sin_port); return 1; } else if( CI_IP_IS_MULTICAST(la.sin_addr.s_addr) ) { for( i = 0; i < ip_list_n; ++i ) if( interface_is(ip_list_name(i), "sfc") ) clear_affinity(interface_to_ifindex(ip_list_name(i)), proto, la.sin_addr.s_addr, la.sin_port, ra.sin_addr.s_addr, ra.sin_port); return 1; } else { ifindex = ip_to_ifindex(la.sin_addr.s_addr); if( ifindex < 0 ) { err("%s: ERROR: Can't find interface for IP %s\n", me, inet_ntoa(la.sin_addr)); return 0; } clear_affinity(ifindex, proto, la.sin_addr.s_addr, la.sin_port, ra.sin_addr.s_addr, ra.sin_port); return 1; } }
int ci_udp_filter_recved_pkts(ci_netif* ni, ci_udp_state* us) { enum onload_zc_callback_rc rc; struct onload_zc_msg zc_msg; struct onload_zc_iovec zc_iovec[CI_UDP_ZC_IOVEC_MAX]; ci_ip_pkt_fmt* pkt; unsigned cb_flags; int dropped_bytes; ci_assert(ci_sock_is_locked(ni, &us->s.b)); zc_msg.iov = zc_iovec; zc_msg.msghdr.msg_controllen = 0; zc_msg.msghdr.msg_flags = 0; while( us->recv_q.pkts_added != us->recv_q.pkts_filter_passed + us->recv_q.pkts_filter_dropped ) { ci_rmb(); pkt = PKT_CHK_NNL(ni, us->recv_q.filter); if( pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED) ) { /* We know this can't go past tail because of the while loop condition */ us->recv_q.filter = pkt->next; pkt = PKT_CHK_NNL(ni, us->recv_q.filter); ci_assert( !(pkt->pf.udp.rx_flags & (CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED | CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED)) ); } ci_udp_pkt_to_zc_msg(ni, pkt, &zc_msg); cb_flags = CI_IP_IS_MULTICAST(oo_ip_hdr(pkt)->ip_daddr_be32) ? ONLOAD_ZC_MSG_SHARED : 0; rc = (*(onload_zc_recv_filter_callback)((ci_uintptr_t)us->recv_q_filter)) (&zc_msg, (void *)((ci_uintptr_t)us->recv_q_filter_arg), cb_flags); ci_assert(!(rc & ONLOAD_ZC_KEEP)); if( rc & ONLOAD_ZC_TERMINATE ) { us->recv_q.bytes_filter_dropped += pkt->pf.udp.pay_len; pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_DROPPED; ++us->recv_q.pkts_filter_dropped; } else { pkt->pf.udp.rx_flags |= CI_IP_PKT_FMT_PREFIX_UDP_RX_FILTER_PASSED; ++us->recv_q.pkts_filter_passed; if( rc & ONLOAD_ZC_MODIFIED ) { ci_assert(!(cb_flags & ONLOAD_ZC_MSG_SHARED)); dropped_bytes = ci_zc_msg_to_udp_pkt(ni, &zc_msg, pkt); ci_assert_gt(dropped_bytes, 0); ci_assert_lt(dropped_bytes, pkt->pf.udp.pay_len); pkt->pf.udp.pay_len -= dropped_bytes; us->recv_q.bytes_filter_dropped += dropped_bytes; } us->recv_q.bytes_filter_passed += pkt->pf.udp.pay_len; return 1; } } return us->recv_q.pkts_filter_passed != us->recv_q.pkts_delivered; }
void cicp_user_retrieve(ci_netif* ni, ci_ip_cached_hdrs* ipcache, const struct oo_sock_cplane* sock_cp) { struct cp_fwd_key key; struct cp_fwd_data data; int rc; uint32_t daddr_be32 = ipcache->ip.ip_daddr_be32; /* This function must be called when "the route is unusable". I.e. when * the route is invalid or if there is no ARP. In the second case, we * can expedite ARP resolution by explicit request just now. */ if( oo_cp_verinfo_is_valid(ni->cplane, &ipcache->mac_integrity) ) { ci_assert_equal(ipcache->status, retrrc_nomac); oo_cp_arp_resolve(ni->cplane, &ipcache->mac_integrity); /* Re-check the version of the fwd entry after ARP resolution. * Return if nothing changed; otherwise handle the case when ARP has * already been resolved. */ if( oo_cp_verinfo_is_valid(ni->cplane, &ipcache->mac_integrity) ) return; } key.dst = daddr_be32; key.tos = sock_cp->ip_tos; key.flag = 0; if( ipcache->ip.ip_protocol == IPPROTO_UDP ) key.flag |= CP_FWD_KEY_UDP; key.ifindex = sock_cp->so_bindtodevice; if( CI_IP_IS_MULTICAST(daddr_be32) ) { if( sock_cp->sock_cp_flags & OO_SCP_NO_MULTICAST ) { ipcache->status = retrrc_alienroute; ipcache->hwport = CI_HWPORT_ID_BAD; ipcache->intf_i = -1; return; } /* In linux, SO_BINDTODEVICE has the priority over IP_MULTICAST_IF */ if( key.ifindex == 0 ) key.ifindex = sock_cp->ip_multicast_if; key.src = sock_cp->ip_multicast_if_laddr_be32; if( key.src == 0 && sock_cp->ip_laddr_be32 != 0 ) key.src = sock_cp->ip_laddr_be32; } else { key.src = sock_cp->ip_laddr_be32; if( sock_cp->sock_cp_flags & OO_SCP_TPROXY ) key.flag |= CP_FWD_KEY_TRANSPARENT; } if( key.src == 0 && sock_cp->sock_cp_flags & OO_SCP_UDP_WILD ) key.flag |= CP_FWD_KEY_SOURCELESS; #ifdef __KERNEL__ if( ! (ni->flags & CI_NETIF_FLAG_IN_DL_CONTEXT) ) #endif key.flag |= CP_FWD_KEY_REQ_WAIT; rc = cicp_user_resolve(ni, &ipcache->mac_integrity, &key, &data); if( rc == 0 && key.src == 0 && ! (sock_cp->sock_cp_flags & OO_SCP_UDP_WILD) ) { key.src = data.src; rc = cicp_user_resolve(ni, &ipcache->mac_integrity, &key, &data); } switch( data.ifindex ) { case CI_IFID_LOOP: ipcache->status = retrrc_localroute; ipcache->encap.type = CICP_LLAP_TYPE_NONE; ipcache->ether_offset = 4; ipcache->intf_i = OO_INTF_I_LOOPBACK; return; case CI_IFID_BAD: ipcache->status = retrrc_alienroute; ipcache->intf_i = -1; return; default: { cicp_hwport_mask_t hwports = 0; /* Can we accelerate interface in this stack ? */ if( (data.encap.type & CICP_LLAP_TYPE_BOND) == 0 && (data.hwports & ~(ci_netif_get_hwport_mask(ni))) == 0 ) break; /* Check bond */ rc = oo_cp_find_llap(ni->cplane, data.ifindex, NULL/*mtu*/, NULL /*tx_hwports*/, &hwports /*rx_hwports*/, NULL/*mac*/, NULL /*encap*/); if( rc != 0 || (hwports & ~(ci_netif_get_hwport_mask(ni))) ) { ipcache->status = retrrc_alienroute; ipcache->intf_i = -1; } break; } } ipcache->encap = data.encap; #if CI_CFG_TEAMING if( ipcache->encap.type & CICP_LLAP_TYPE_USES_HASH ) { if( cicp_user_bond_hash_get_hwport(ni, ipcache, data.hwports, sock_cp->lport_be16, daddr_be32) != 0 ) { ipcache->status = retrrc_alienroute; ipcache->intf_i = -1; return; } } else #endif ipcache->hwport = cp_hwport_mask_first(data.hwports); ipcache->mtu = data.mtu; ipcache->ip_saddr.ip4 = key.src == INADDR_ANY ? data.src : key.src; ipcache->ifindex = data.ifindex; ipcache->nexthop.ip4 = data.next_hop; if( ! ci_ip_cache_is_onloadable(ni, ipcache)) { ipcache->status = retrrc_alienroute; ipcache->intf_i = -1; return; } /* Layout the Ethernet header, and set the source mac. * Route resolution already issues ARP request, so there is no need to * call oo_cp_arp_resolve() explicitly in case of retrrc_nomac. */ ipcache->status = data.arp_valid ? retrrc_success : retrrc_nomac; cicp_ipcache_vlan_set(ipcache); memcpy(ci_ip_cache_ether_shost(ipcache), &data.src_mac, ETH_ALEN); if( data.arp_valid ) memcpy(ci_ip_cache_ether_dhost(ipcache), &data.dst_mac, ETH_ALEN); if( CI_IP_IS_MULTICAST(daddr_be32) ) ipcache->ip.ip_ttl = sock_cp->ip_mcast_ttl; else ipcache->ip.ip_ttl = sock_cp->ip_ttl; }
/* In this bind handler we just check that the address to which * are binding is either "any" or one of ours. * In the Linux kernel version [fd] is unused. */ int ci_tcp_bind(citp_socket* ep, const struct sockaddr* my_addr, socklen_t addrlen, ci_fd_t fd ) { struct sockaddr_in* my_addr_in; ci_uint16 new_port; ci_uint32 addr_be32; ci_sock_cmn* s = ep->s; ci_tcp_state* c = &SOCK_TO_WAITABLE_OBJ(s)->tcp; int rc; CHECK_TEP(ep); my_addr_in = (struct sockaddr_in*) my_addr; /* Check if state of the socket is OK for bind operation. */ /* \todo Earlier (TS_TCP( epi->tcpep.state )->tcp_source_be16) is used. * What is better? */ if (my_addr == NULL) RET_WITH_ERRNO( EINVAL ); if (s->b.state != CI_TCP_CLOSED) RET_WITH_ERRNO( EINVAL ); if (c->tcpflags & CI_TCPT_FLAG_WAS_ESTAB) RET_WITH_ERRNO( EINVAL ); if( my_addr->sa_family != s->domain ) RET_WITH_ERRNO( s->domain == PF_INET ? EAFNOSUPPORT : EINVAL ); /* Bug 4884: Windows regularly uses addrlen > sizeof(struct sockaddr_in) * Linux is also relaxed about overlength data areas. */ if (s->domain == PF_INET && addrlen < sizeof(struct sockaddr_in)) RET_WITH_ERRNO( EINVAL ); #if CI_CFG_FAKE_IPV6 if (s->domain == PF_INET6 && addrlen < SIN6_LEN_RFC2133) RET_WITH_ERRNO( EINVAL ); if( s->domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(my_addr) ) return CI_SOCKET_HANDOVER; #endif addr_be32 = ci_get_ip4_addr(s->domain, my_addr); /* Using the port number provided, see if we can do this bind */ new_port = my_addr_in->sin_port; if( CITP_OPTS.tcp_reuseports != 0 && new_port != 0 ) { struct ci_port_list *force_reuseport; CI_DLLIST_FOR_EACH2(struct ci_port_list, force_reuseport, link, (ci_dllist*)(ci_uintptr_t)CITP_OPTS.tcp_reuseports) { if( force_reuseport->port == new_port ) { int one = 1; ci_fd_t os_sock = ci_get_os_sock_fd(ep, fd); ci_assert(CI_IS_VALID_SOCKET(os_sock)); rc = ci_sys_setsockopt(os_sock, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); ci_rel_os_sock_fd(os_sock); if( rc != 0 && errno == ENOPROTOOPT ) ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT_LEGACY; ep->s->s_flags |= CI_SOCK_FLAG_REUSEPORT; LOG_TC(log("%s "SF_FMT", applied legacy SO_REUSEPORT flag for port %u", __FUNCTION__, SF_PRI_ARGS(ep, fd), new_port)); } } } if( !(ep->s->s_flags & CI_SOCK_FLAG_REUSEPORT_LEGACY) ) CI_LOGLEVEL_TRY_RET(LOG_TV, __ci_bind(ep->netif, ep->s, addr_be32, &new_port)); ep->s->s_flags |= CI_SOCK_FLAG_BOUND; sock_lport_be16(s) = new_port; sock_laddr_be32(s) = addr_be32; if( CI_IP_IS_MULTICAST(addr_be32) ) s->cp.ip_laddr_be32 = 0; else s->cp.ip_laddr_be32 = addr_be32; s->cp.lport_be16 = new_port; sock_rport_be16(s) = sock_raddr_be32(s) = 0; LOG_TC(log(LPF "bind to %s:%u n_p:%u lp:%u", ip_addr_str(addr_be32), (unsigned) CI_BSWAP_BE16(my_addr_in->sin_port), CI_BSWAP_BE16(new_port), CI_BSWAP_BE16(sock_lport_be16(s)))); return 0; }