void ci_ip_send_pkt_lookup(ci_netif* ni, const struct oo_sock_cplane* sock_cp_opt, ci_ip_pkt_fmt* pkt, ci_ip_cached_hdrs* ipcache) { ci_ip4_hdr* pkt_ip = oo_tx_ip_hdr(pkt); struct oo_sock_cplane sock_cp; ci_assert(pkt_ip->ip_saddr_be32 != 0); ci_assert(pkt_ip->ip_daddr_be32 != 0); if( sock_cp_opt != NULL ) sock_cp = *sock_cp_opt; else oo_sock_cplane_init(&sock_cp); ci_ip_cache_init(ipcache); sock_cp.ip_laddr_be32 = pkt_ip->ip_saddr_be32; ipcache->ip.ip_daddr_be32 = pkt_ip->ip_daddr_be32; switch( pkt_ip->ip_protocol ) { case IPPROTO_UDP: case IPPROTO_TCP: sock_cp.lport_be16 = TX_PKT_SPORT_BE16(pkt); ipcache->dport_be16 = TX_PKT_DPORT_BE16(pkt); break; default: sock_cp.lport_be16 = 0; ipcache->dport_be16 = 0; break; } cicp_user_retrieve(ni, ipcache, &sock_cp); }
/* check that we can handle this destination */ static int ci_tcp_connect_check_dest(citp_socket* ep, ci_ip_addr_t dst_be32, int dport_be16) { ci_ip_cached_hdrs* ipcache = &ep->s->pkt; ipcache->ip.ip_daddr_be32 = dst_be32; ipcache->dport_be16 = dport_be16; cicp_user_retrieve(ep->netif, ipcache, &ep->s->cp); if(CI_LIKELY( ipcache->status == retrrc_success || ipcache->status == retrrc_nomac || ipcache->status < 0 )) { /* Onloadable. */ if( ipcache->encap.type & CICP_LLAP_TYPE_XMIT_HASH_LAYER4 ) /* We don't yet have a local port number, so the result of that * lookup may be wrong. */ ci_ip_cache_invalidate(ipcache); if( ipcache->ip.ip_saddr_be32 == 0 ) { /* Control plane has selected a source address for us -- remember it. */ ipcache->ip.ip_saddr_be32 = ipcache->ip_saddr_be32; ep->s->cp.ip_laddr_be32 = ipcache->ip_saddr_be32; } return 0; } else if( ipcache->status == retrrc_localroute ) { ci_tcp_state* ts = SOCK_TO_TCP(ep->s); if( NI_OPTS(ep->netif).tcp_client_loopback == CITP_TCP_LOOPBACK_OFF) return CI_SOCKET_HANDOVER; ep->s->s_flags |= CI_SOCK_FLAG_BOUND_ALIEN; if( NI_OPTS(ep->netif).tcp_server_loopback != CITP_TCP_LOOPBACK_OFF ) ts->local_peer = ci_tcp_connect_find_local_peer(ep->netif, dst_be32, dport_be16); else ts->local_peer = OO_SP_NULL; if( OO_SP_NOT_NULL(ts->local_peer) || NI_OPTS(ep->netif).tcp_client_loopback != CITP_TCP_LOOPBACK_SAMESTACK ) { ipcache->flags |= CI_IP_CACHE_IS_LOCALROUTE; if( ipcache->ip.ip_saddr_be32 == 0 ) { ipcache->ip.ip_saddr_be32 = dst_be32; ep->s->cp.ip_laddr_be32 = dst_be32; } ipcache->ether_offset = 4; /* lo is non-VLAN */ ipcache->ip_saddr_be32 = dst_be32; ipcache->dport_be16 = dport_be16; return 0; } return CI_SOCKET_HANDOVER; } return CI_SOCKET_HANDOVER; }
/* Complete a UDP U/L connect. The sys connect() call must have been made * (and succeeded) before calling this function. So if anything goes wrong * in here, then it can be consider an internal error or failing of onload. */ int ci_udp_connect_conclude(citp_socket* ep, ci_fd_t fd, const struct sockaddr* serv_addr, socklen_t addrlen, ci_fd_t os_sock) { const struct sockaddr_in* serv_sin = (const struct sockaddr_in*) serv_addr; ci_uint32 dst_be32; ci_udp_state* us = SOCK_TO_UDP(ep->s); int onloadable; int rc = 0; CHECK_UEP(ep); UDP_CLR_FLAG(us, CI_UDPF_EF_SEND); us->s.rx_errno = 0; us->s.tx_errno = 0; if( IS_DISCONNECTING(serv_sin) ) { rc = ci_udp_disconnect(ep, us, os_sock); goto out; } #if CI_CFG_FAKE_IPV6 if( us->s.domain == PF_INET6 && !ci_tcp_ipv6_is_ipv4(serv_addr) ) { LOG_UC(log(FNT_FMT "HANDOVER not IPv4", FNT_PRI_ARGS(ep->netif, us))); goto handover; } #endif dst_be32 = ci_get_ip4_addr(serv_sin->sin_family, serv_addr); if( (rc = ci_udp_sys_getsockname(os_sock, ep)) != 0 ) { LOG_E(log(FNT_FMT "ERROR: (%s:%d) sys_getsockname failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), errno)); goto out; } us->s.cp.sock_cp_flags |= OO_SCP_CONNECTED; ci_udp_set_raddr(us, dst_be32, serv_sin->sin_port); cicp_user_retrieve(ep->netif, &us->s.pkt, &us->s.cp); switch( us->s.pkt.status ) { case retrrc_success: case retrrc_nomac: onloadable = 1; break; default: onloadable = 0; if( NI_OPTS(ep->netif).udp_connect_handover ) { LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } break; } if( dst_be32 == INADDR_ANY_BE32 || serv_sin->sin_port == 0 ) { LOG_UC(log(FNT_FMT "%s:%d - route via OS socket", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); ci_udp_clr_filters(ep); return 0; } if( CI_IP_IS_LOOPBACK(dst_be32) ) { /* After connecting via loopback it is not possible to connect anywhere * else. */ LOG_UC(log(FNT_FMT "HANDOVER %s:%d", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port))); goto handover; } if( onloadable ) { #ifdef ONLOAD_OFE if( ep->netif->ofe != NULL ) us->s.ofe_code_start = ofe_socktbl_find( ep->netif->ofe, OFE_SOCKTYPE_UDP, udp_laddr_be32(us), udp_raddr_be32(us), udp_lport_be16(us), udp_rport_be16(us)); #endif if( (rc = ci_udp_set_filters(ep, us)) != 0 ) { /* Failed to set filters. Most likely we've run out of h/w filters. * Handover to O/S to avoid breaking the app. * * TODO: Actually we probably won't break the app if we don't * handover, as packets will still get delivered via the kernel * stack. Might be worth having a runtime option to choose whether * or not to handover in such cases. */ LOG_U(log(FNT_FMT "ERROR: (%s:%d) ci_udp_set_filters failed (%d)", FNT_PRI_ARGS(ep->netif, us), ip_addr_str(dst_be32), CI_BSWAP_BE16(serv_sin->sin_port), rc)); CITP_STATS_NETIF(++ep->netif->state->stats.udp_connect_no_filter); goto out; } } else { ci_udp_clr_filters(ep); } LOG_UC(log(LPF "connect: "SF_FMT" %sCONNECTED L:%s:%u R:%s:%u (err:%d)", SF_PRI_ARGS(ep,fd), udp_raddr_be32(us) ? "" : "DIS", ip_addr_str(udp_laddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_lport_be16(us)), ip_addr_str(udp_raddr_be32(us)), (unsigned) CI_BSWAP_BE16(udp_rport_be16(us)), errno)); return 0; out: if( rc < 0 && CITP_OPTS.no_fail ) goto handover; return rc; handover: ci_udp_clr_filters(ep); return CI_SOCKET_HANDOVER; }
void ci_ip_send_tcp_slow(ci_netif* ni, ci_tcp_state* ts, ci_ip_pkt_fmt* pkt) { /* We're here because the ipcache is not valid. */ int rc, prev_mtu = ts->s.pkt.mtu; cicp_user_retrieve(ni, &ts->s.pkt, &ts->s.cp); if( ts->s.pkt.status == retrrc_success ) { if( ts->s.pkt.mtu != prev_mtu ) CI_PMTU_TIMER_NOW(ni, &ts->pmtus); ci_ip_set_mac_and_port(ni, &ts->s.pkt, pkt); ci_netif_send(ni, pkt); return; } else if( ts->s.pkt.status == retrrc_localroute && (ts->s.pkt.flags & CI_IP_CACHE_IS_LOCALROUTE) ) ci_ip_local_send(ni, pkt, &ts->s, OO_SP_NULL); /* For TCP, we want the ipcache to only be valid when onloadable. */ ci_ip_cache_invalidate(&ts->s.pkt); switch( ts->s.pkt.status ) { case retrrc_nomac: rc = 0; /* If we resend SYN, and there is no MAC - it means ARP failed. * Connect() should return with EHOSTUNREACH. * We verify twice - on the first and the second retransmit. * Very hackish. */ if( ts->s.b.state == CI_TCP_SYN_SENT ) { if( ts->retransmits == 1 ) ts->tcpflags |= CI_TCPT_FLAG_NO_ARP; else if( (ts->tcpflags & CI_TCPT_FLAG_NO_ARP) && ts->retransmits == 2 ) { ci_tcp_drop(ni, ts, EHOSTUNREACH); return; } } cicp_user_defer_send(ni, retrrc_nomac, &rc, OO_PKT_P(pkt), ts->s.pkt.ifindex); ++ts->stats.tx_nomac_defer; return; case retrrc_noroute: rc = -EHOSTUNREACH; break; case retrrc_alienroute: case retrrc_localroute: /* ?? TODO: inc some stat */ return; default: ci_assert_lt(ts->s.pkt.status, 0); if( ts->s.pkt.status < 0 ) rc = ts->s.pkt.status; else /* belt and braces... */ rc = 0; } ci_assert_le(rc, 0); /* In most cases, we should ignore return code; the packet will be resend * later, because of RTO. However, in SYN-SENT we should pass errors to * user. At the same time, we should not pass ENOBUFS to user - it is * pretty internal problem of cplane, so we should try again. Possibly, * there may be other internal problems, such as ENOMEM. * * Also, do not break connection when the first SYN fails: * - Linux does not do it; * - cplane has some latency, so we have false positives here; * - ci_tcp_connect() does not expect it. */ if( ts->s.b.state == CI_TCP_SYN_SENT && rc < 0 && ts->retransmits > 0 && (rc == -EHOSTUNREACH || rc == -ENETUNREACH || rc == -ENETDOWN) ) ci_tcp_drop(ni, ts, -rc); }