/* * Similar to rss_m2cpuid, but designed to be used by the IP NETISR * on incoming frames. * * If an existing RSS hash exists and it matches what the configured * hashing is, then use it. * * If there's an existing RSS hash but the desired hash is different, * or if there's no useful RSS hash, then calculate it via * the software path. * * XXX TODO: definitely want statistics here! */ struct mbuf * rss_soft_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid) { uint32_t hash_val, hash_type; int ret; M_ASSERTPKTHDR(m); ret = rss_mbuf_software_hash_v4(m, RSS_HASH_PKT_INGRESS, &hash_val, &hash_type); if (ret > 0) { /* mbuf has a valid hash already; don't need to modify it */ *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); } else if (ret == 0) { /* hash was done; update */ m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); m->m_flags |= M_FLOWID; *cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m)); } else { /* ret < 0 */ /* no hash was done */ *cpuid = NETISR_CPUID_NONE; } return (m); }
static void sfxge_rx_deliver(struct sfxge_softc *sc, struct sfxge_rx_sw_desc *rx_desc) { struct mbuf *m = rx_desc->mbuf; int flags = rx_desc->flags; int csum_flags; /* Convert checksum flags */ csum_flags = (flags & EFX_CKSUM_IPV4) ? (CSUM_IP_CHECKED | CSUM_IP_VALID) : 0; if (flags & EFX_CKSUM_TCPUDP) csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; if (flags & (EFX_PKT_IPV4 | EFX_PKT_IPV6)) { m->m_pkthdr.flowid = efx_psuedo_hdr_hash_get(sc->enp, EFX_RX_HASHALG_TOEPLITZ, mtod(m, uint8_t *)); /* The hash covers a 4-tuple for TCP only */ M_HASHTYPE_SET(m, (flags & EFX_PKT_IPV4) ? ((flags & EFX_PKT_TCP) ? M_HASHTYPE_RSS_TCP_IPV4 : M_HASHTYPE_RSS_IPV4) : ((flags & EFX_PKT_TCP) ? M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_IPV6)); }
/* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * In the IP forwarding case, the packet will arrive with options already * inserted, so must have a NULL opt pointer. */ int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct rm_priotracker in_ifa_tracker; struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); int mtu; int error = 0; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; int isbroadcast; uint16_t ip_len, ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; int have_ia_ref; #ifdef IPSEC int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); if (inp != NULL) { INP_LOCK_ASSERT(inp); M_SETFIB(m, inp->inp_inc.inc_fibnum); if ((flags & IP_NODEFAULTFLOWID) == 0) { m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } } if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); } #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET, m, ro); #endif if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); if (len != 0) hlen = len; /* ip->ip_hl is updated above */ } ip = mtod(m, struct ip *); ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; ip_fillid(ip); IPSTAT_INC(ips_localout); } else {
/* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. * If route ro is present and has ro_rt initialized, route lookup would be * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, * then result of route lookup is stored in ro->ro_rt. * * In the IP forwarding case, the packet will arrive with options already * inserted, so must have a NULL opt pointer. */ int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, struct ip_moptions *imo, struct inpcb *inp) { struct ip *ip; struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); int mtu; int n; /* scratchpad */ int error = 0; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; int isbroadcast; uint16_t ip_len, ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ struct in_addr odst; struct m_tag *fwd_tag = NULL; int have_ia_ref; #ifdef IPSEC int no_route_but_check_spd = 0; #endif M_ASSERTPKTHDR(m); if (inp != NULL) { INP_LOCK_ASSERT(inp); M_SETFIB(m, inp->inp_inc.inc_fibnum); if (inp->inp_flowtype != M_HASHTYPE_NONE) { m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); } } if (ro == NULL) { ro = &iproute; bzero(ro, sizeof (*ro)); } #ifdef FLOWTABLE if (ro->ro_rt == NULL) (void )flowtable_lookup(AF_INET, m, ro); #endif if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); if (len != 0) hlen = len; /* ip->ip_hl is updated above */ } ip = mtod(m, struct ip *); ip_len = ntohs(ip->ip_len); ip_off = ntohs(ip->ip_off); /* * Fill in IP header. If we are not allowing fragmentation, * then the ip_id field is meaningless, but we don't set it * to zero. Doing so causes various problems when devices along * the path (routers, load balancers, firewalls, etc.) illegally * disable DF on our packet. Note that a 16-bit counter * will wrap around in less than 10 seconds at 100 Mbit/s on a * medium with MTU 1500. See Steven M. Bellovin, "A Technique * for Counting NATted Hosts", Proc. IMW'02, available at * <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>. */ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_hl = hlen >> 2; ip->ip_id = ip_newid(); IPSTAT_INC(ips_localout); } else {
/* * Name: qla_rx_intr * Function: Handles normal ethernet frames received */ static void qla_rx_intr(qla_host_t *ha, qla_sgl_rcv_t *sgc, uint32_t sds_idx) { qla_rx_buf_t *rxb; struct mbuf *mp = NULL, *mpf = NULL, *mpl = NULL; struct ifnet *ifp = ha->ifp; qla_sds_t *sdsp; struct ether_vlan_header *eh; uint32_t i, rem_len = 0; uint32_t r_idx = 0; qla_rx_ring_t *rx_ring; if (ha->hw.num_rds_rings > 1) r_idx = sds_idx; ha->hw.rds[r_idx].count++; sdsp = &ha->hw.sds[sds_idx]; rx_ring = &ha->rx_ring[r_idx]; for (i = 0; i < sgc->num_handles; i++) { rxb = &rx_ring->rx_buf[sgc->handle[i] & 0x7FFF]; QL_ASSERT(ha, (rxb != NULL), ("%s: [sds_idx]=[%d] rxb != NULL\n", __func__,\ sds_idx)); if ((rxb == NULL) || QL_ERR_INJECT(ha, INJCT_RX_RXB_INVAL)) { /* log the error */ device_printf(ha->pci_dev, "%s invalid rxb[%d, %d, 0x%04x]\n", __func__, sds_idx, i, sgc->handle[i]); qla_rcv_error(ha); return; } mp = rxb->m_head; if (i == 0) mpf = mp; QL_ASSERT(ha, (mp != NULL), ("%s: [sds_idx]=[%d] mp != NULL\n", __func__,\ sds_idx)); bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_POSTREAD); rxb->m_head = NULL; rxb->next = sdsp->rxb_free; sdsp->rxb_free = rxb; sdsp->rx_free++; if ((mp == NULL) || QL_ERR_INJECT(ha, INJCT_RX_MP_NULL)) { /* log the error */ device_printf(ha->pci_dev, "%s mp == NULL [%d, %d, 0x%04x]\n", __func__, sds_idx, i, sgc->handle[i]); qla_rcv_error(ha); return; } if (i == 0) { mpl = mpf = mp; mp->m_flags |= M_PKTHDR; mp->m_pkthdr.len = sgc->pkt_length; mp->m_pkthdr.rcvif = ifp; rem_len = mp->m_pkthdr.len; } else { mp->m_flags &= ~M_PKTHDR; mpl->m_next = mp; mpl = mp; rem_len = rem_len - mp->m_len; } } mpl->m_len = rem_len; eh = mtod(mpf, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { uint32_t *data = (uint32_t *)eh; mpf->m_pkthdr.ether_vtag = ntohs(eh->evl_tag); mpf->m_flags |= M_VLANTAG; *(data + 3) = *(data + 2); *(data + 2) = *(data + 1); *(data + 1) = *data; m_adj(mpf, ETHER_VLAN_ENCAP_LEN); } if (sgc->chksum_status == Q8_STAT_DESC_STATUS_CHKSUM_OK) { mpf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mpf->m_pkthdr.csum_data = 0xFFFF; } else { mpf->m_pkthdr.csum_flags = 0; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); mpf->m_pkthdr.flowid = sgc->rss_hash; M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE); (*ifp->if_input)(ifp, mpf); if (sdsp->rx_free > ha->std_replenish) qla_replenish_normal_rx(ha, sdsp, r_idx); return; }
/* * Name: qla_lro_intr * Function: Handles normal ethernet frames received */ static int qla_lro_intr(qla_host_t *ha, qla_sgl_lro_t *sgc, uint32_t sds_idx) { qla_rx_buf_t *rxb; struct mbuf *mp = NULL, *mpf = NULL, *mpl = NULL; struct ifnet *ifp = ha->ifp; qla_sds_t *sdsp; struct ether_vlan_header *eh; uint32_t i, rem_len = 0, pkt_length, iplen; struct tcphdr *th; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; uint16_t etype; uint32_t r_idx = 0; qla_rx_ring_t *rx_ring; if (ha->hw.num_rds_rings > 1) r_idx = sds_idx; ha->hw.rds[r_idx].count++; rx_ring = &ha->rx_ring[r_idx]; ha->lro_pkt_count++; sdsp = &ha->hw.sds[sds_idx]; pkt_length = sgc->payload_length + sgc->l4_offset; if (sgc->flags & Q8_LRO_COMP_TS) { pkt_length += QLA_TCP_HDR_SIZE + QLA_TCP_TS_OPTION_SIZE; } else { pkt_length += QLA_TCP_HDR_SIZE; } ha->lro_bytes += pkt_length; for (i = 0; i < sgc->num_handles; i++) { rxb = &rx_ring->rx_buf[sgc->handle[i] & 0x7FFF]; QL_ASSERT(ha, (rxb != NULL), ("%s: [sds_idx]=[%d] rxb != NULL\n", __func__,\ sds_idx)); if ((rxb == NULL) || QL_ERR_INJECT(ha, INJCT_LRO_RXB_INVAL)) { /* log the error */ device_printf(ha->pci_dev, "%s invalid rxb[%d, %d, 0x%04x]\n", __func__, sds_idx, i, sgc->handle[i]); qla_rcv_error(ha); return (0); } mp = rxb->m_head; if (i == 0) mpf = mp; QL_ASSERT(ha, (mp != NULL), ("%s: [sds_idx]=[%d] mp != NULL\n", __func__,\ sds_idx)); bus_dmamap_sync(ha->rx_tag, rxb->map, BUS_DMASYNC_POSTREAD); rxb->m_head = NULL; rxb->next = sdsp->rxb_free; sdsp->rxb_free = rxb; sdsp->rx_free++; if ((mp == NULL) || QL_ERR_INJECT(ha, INJCT_LRO_MP_NULL)) { /* log the error */ device_printf(ha->pci_dev, "%s mp == NULL [%d, %d, 0x%04x]\n", __func__, sds_idx, i, sgc->handle[i]); qla_rcv_error(ha); return (0); } if (i == 0) { mpl = mpf = mp; mp->m_flags |= M_PKTHDR; mp->m_pkthdr.len = pkt_length; mp->m_pkthdr.rcvif = ifp; rem_len = mp->m_pkthdr.len; } else { mp->m_flags &= ~M_PKTHDR; mpl->m_next = mp; mpl = mp; rem_len = rem_len - mp->m_len; } } mpl->m_len = rem_len; th = (struct tcphdr *)(mpf->m_data + sgc->l4_offset); if (sgc->flags & Q8_LRO_COMP_PUSH_BIT) th->th_flags |= TH_PUSH; m_adj(mpf, sgc->l2_offset); eh = mtod(mpf, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { uint32_t *data = (uint32_t *)eh; mpf->m_pkthdr.ether_vtag = ntohs(eh->evl_tag); mpf->m_flags |= M_VLANTAG; *(data + 3) = *(data + 2); *(data + 2) = *(data + 1); *(data + 1) = *data; m_adj(mpf, ETHER_VLAN_ENCAP_LEN); etype = ntohs(eh->evl_proto); } else { etype = ntohs(eh->evl_encap_proto); } if (etype == ETHERTYPE_IP) { ip = (struct ip *)(mpf->m_data + ETHER_HDR_LEN); iplen = (ip->ip_hl << 2) + (th->th_off << 2) + sgc->payload_length; ip->ip_len = htons(iplen); ha->ipv4_lro++; } else if (etype == ETHERTYPE_IPV6) { ip6 = (struct ip6_hdr *)(mpf->m_data + ETHER_HDR_LEN); iplen = (th->th_off << 2) + sgc->payload_length; ip6->ip6_plen = htons(iplen); ha->ipv6_lro++; } else { m_freem(mpf); if (sdsp->rx_free > ha->std_replenish) qla_replenish_normal_rx(ha, sdsp, r_idx); return 0; } mpf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR; mpf->m_pkthdr.csum_data = 0xFFFF; mpf->m_pkthdr.flowid = sgc->rss_hash; M_HASHTYPE_SET(mpf, M_HASHTYPE_OPAQUE); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); (*ifp->if_input)(ifp, mpf); if (sdsp->rx_free > ha->std_replenish) qla_replenish_normal_rx(ha, sdsp, r_idx); return (0); }
static int udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6, struct mbuf *control, struct thread *td) { u_int32_t ulen = m->m_pkthdr.len; u_int32_t plen = sizeof(struct udphdr) + ulen; struct ip6_hdr *ip6; struct udphdr *udp6; struct in6_addr *laddr, *faddr, in6a; struct sockaddr_in6 *sin6 = NULL; int cscov_partial = 0; int scope_ambiguous = 0; u_short fport; int error = 0; uint8_t nxt; uint16_t cscov = 0; struct ip6_pktopts *optp, opt; int af = AF_INET6, hlen = sizeof(struct ip6_hdr); int flags; struct sockaddr_in6 tmp; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); if (addr6) { /* addr6 has been validated in udp6_send(). */ sin6 = (struct sockaddr_in6 *)addr6; /* protect *sin6 from overwrites */ tmp = *sin6; sin6 = &tmp; /* * Application should provide a proper zone ID or the use of * default zone IDs should be enabled. Unfortunately, some * applications do not behave as it should, so we need a * workaround. Even if an appropriate ID is not determined, * we'll see if we can determine the outgoing interface. If we * can, determine the zone ID based on the interface below. */ if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) scope_ambiguous = 1; if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) return (error); } nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ? IPPROTO_UDP : IPPROTO_UDPLITE; if (control) { if ((error = ip6_setpktopts(control, &opt, inp->in6p_outputopts, td->td_ucred, nxt)) != 0) goto release; optp = &opt; } else optp = inp->in6p_outputopts; if (sin6) { faddr = &sin6->sin6_addr; /* * Since we saw no essential reason for calling in_pcbconnect, * we get rid of such kind of logic, and call in6_selectsrc * and in6_pcbsetport in order to fill in the local address * and the local port. */ if (sin6->sin6_port == 0) { error = EADDRNOTAVAIL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { /* how about ::ffff:0.0.0.0 case? */ error = EISCONN; goto release; } fport = sin6->sin6_port; /* allow 0 port */ if (IN6_IS_ADDR_V4MAPPED(faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * I believe we should explicitly discard the * packet when mapped addresses are disabled, * rather than send the packet as an IPv6 one. * If we chose the latter approach, the packet * might be sent out on the wire based on the * default route, the situation which we'd * probably want to avoid. * (20010421 [email protected]) */ error = EINVAL; goto release; } if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) && !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) { /* * when remote addr is an IPv4-mapped address, * local addr should not be an IPv6 address, * since you cannot determine how to map IPv6 * source address to IPv4. */ error = EINVAL; goto release; } af = AF_INET; } if (!IN6_IS_ADDR_V4MAPPED(faddr)) { error = in6_selectsrc_socket(sin6, optp, inp, td->td_ucred, scope_ambiguous, &in6a, NULL); if (error) goto release; laddr = &in6a; } else laddr = &inp->in6p_laddr; /* XXX */ if (laddr == NULL) { if (error == 0) error = EADDRNOTAVAIL; goto release; } if (inp->inp_lport == 0 && (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) { /* Undo an address bind that may have occurred. */ inp->in6p_laddr = in6addr_any; goto release; } } else { if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { error = ENOTCONN; goto release; } if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) { if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) { /* * XXX: this case would happen when the * application sets the V6ONLY flag after * connecting the foreign address. * Such applications should be fixed, * so we bark here. */ log(LOG_INFO, "udp6_output: IPV6_V6ONLY " "option was set for a connected socket\n"); error = EINVAL; goto release; } else af = AF_INET; } laddr = &inp->in6p_laddr; faddr = &inp->in6p_faddr; fport = inp->inp_fport; } if (af == AF_INET) hlen = sizeof(struct ip); /* * Calculate data length and get a mbuf * for UDP and IP6 headers. */ M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto release; } /* * Stuff checksum and output datagram. */ udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen); udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */ udp6->uh_dport = fport; if (nxt == IPPROTO_UDPLITE) { struct udpcb *up; up = intoudpcb(inp); cscov = up->u_txcslen; if (cscov >= plen) cscov = 0; udp6->uh_ulen = htons(cscov); /* * For UDP-Lite, checksum coverage length of zero means * the entire UDPLite packet is covered by the checksum. */ cscov_partial = (cscov == 0) ? 0 : 1; } else if (plen <= 0xffff) udp6->uh_ulen = htons((u_short)plen); else udp6->uh_ulen = 0; udp6->uh_sum = 0; switch (af) { case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = inp->inp_flow & IPV6_FLOWINFO_MASK; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_plen = htons((u_short)plen); ip6->ip6_nxt = nxt; ip6->ip6_hlim = in6_selecthlim(inp, NULL); ip6->ip6_src = *laddr; ip6->ip6_dst = *faddr; if (cscov_partial) { if ((udp6->uh_sum = in6_cksum_partial(m, nxt, sizeof(struct ip6_hdr), plen, cscov)) == 0) udp6->uh_sum = 0xffff; } else { udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0); m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); } #ifdef RSS { uint32_t hash_val, hash_type; uint8_t pr; pr = inp->inp_socket->so_proto->pr_protocol; /* * Calculate an appropriate RSS hash for UDP and * UDP Lite. * * The called function will take care of figuring out * whether a 2-tuple or 4-tuple hash is required based * on the currently configured scheme. * * Later later on connected socket values should be * cached in the inpcb and reused, rather than constantly * re-calculating it. * * UDP Lite is a different protocol number and will * likely end up being hashed as a 2-tuple until * RSS / NICs grow UDP Lite protocol awareness. */ if (rss_proto_software_hash_v6(faddr, laddr, fport, inp->inp_lport, pr, &hash_val, &hash_type) == 0) { m->m_pkthdr.flowid = hash_val; M_HASHTYPE_SET(m, hash_type); } } #endif flags = 0; #ifdef RSS /* * Don't override with the inp cached flowid. * * Until the whole UDP path is vetted, it may actually * be incorrect. */ flags |= IP_NODEFAULTFLOWID; #endif UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); error = ip6_output(m, optp, &inp->inp_route6, flags, inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT; goto release; } goto releaseopt; release: m_freem(m); releaseopt: if (control) { ip6_clearpktopts(&opt, -1); m_freem(control); } return (error); }