/** * Called to deliver a frame to either the host, the wire or both. */ int vboxNetFltPortOsXmit(PVBOXNETFLTINS pThis, void *pvIfData, PINTNETSG pSG, uint32_t fDst) { NOREF(pvIfData); void (*input_f)(struct ifnet *, struct mbuf *); struct ifnet *ifp; struct mbuf *m; struct m_tag *mtag; bool fActive; int error; ifp = ASMAtomicUoReadPtrT(&pThis->u.s.ifp, struct ifnet *); VBOXCURVNET_SET(ifp->if_vnet); if (fDst & INTNETTRUNKDIR_WIRE) { m = vboxNetFltFreeBSDSGMBufFromSG(pThis, pSG); if (m == NULL) return VERR_NO_MEMORY; m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return VERR_NO_MEMORY; m->m_flags |= M_PKTHDR; ether_output_frame(ifp, m); } if (fDst & INTNETTRUNKDIR_HOST) { m = vboxNetFltFreeBSDSGMBufFromSG(pThis, pSG); if (m == NULL) return VERR_NO_MEMORY; m = m_pullup(m, ETHER_HDR_LEN); if (m == NULL) return VERR_NO_MEMORY; /* * Delivering packets to the host will be captured by the * input hook. Tag the packet with a mbuf tag so that we * can skip re-delivery of the packet to the guest during * input hook processing. */ mtag = m_tag_alloc(MTAG_VBOX, PACKET_TAG_VBOX, 0, M_NOWAIT); if (mtag == NULL) { m_freem(m); return VERR_NO_MEMORY; } m_tag_init(m); m_tag_prepend(m, mtag); m->m_flags |= M_PKTHDR; m->m_pkthdr.rcvif = ifp; ifp->if_input(ifp, m); } VBOXCURVNET_RESTORE(); return VINF_SUCCESS; }
/** * Output processing task, handles outgoing frames */ static void vboxNetFltFreeBSDoutput(void *arg, int pending) { PVBOXNETFLTINS pThis = (PVBOXNETFLTINS)arg; struct mbuf *m, *m0; struct ifnet *ifp = pThis->u.s.ifp; unsigned int cSegs = 0; bool fDropIt = false, fActive; PINTNETSG pSG; VBOXCURVNET_SET(ifp->if_vnet); vboxNetFltRetain(pThis, true /* fBusy */); for (;;) { mtx_lock_spin(&pThis->u.s.outq.ifq_mtx); _IF_DEQUEUE(&pThis->u.s.outq, m); mtx_unlock_spin(&pThis->u.s.outq.ifq_mtx); if (m == NULL) break; for (m0 = m; m0 != NULL; m0 = m0->m_next) if (m0->m_len > 0) cSegs++; #ifdef PADD_RUNT_FRAMES_FROM_HOST if (m_length(m, NULL) < 60) cSegs++; #endif /* Create a copy and deliver to the virtual switch */ pSG = RTMemTmpAlloc(RT_OFFSETOF(INTNETSG, aSegs[cSegs])); vboxNetFltFreeBSDMBufToSG(pThis, m, pSG, cSegs, 0); fDropIt = pThis->pSwitchPort->pfnRecv(pThis->pSwitchPort, NULL /* pvIf */, pSG, INTNETTRUNKDIR_HOST); RTMemTmpFree(pSG); if (fDropIt) m_freem(m); else ether_output_frame(ifp, m); } vboxNetFltRelease(pThis, true /* fBusy */); VBOXCURVNET_RESTORE(); }
/* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { short type; int error = 0, hdrcmplt = 0; u_char esrc[ETHER_ADDR_LEN], edst[ETHER_ADDR_LEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) lle = ro->ro_lle; rt0 = ro->ro_rt; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); hlen = ETHER_HDR_LEN; switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = 0; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN); else bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (lle->la_flags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle); if (error) return error; type = htons(ETHERTYPE_IPV6); break; #endif #ifdef IPX case AF_IPX: if (ef_outputp) { error = ef_outputp(ifp, &m, dst, &type, &hlen); if (error) goto bad; } else type = htons(ETHERTYPE_IPX); bcopy(&((const struct sockaddr_ipx *)dst)->sipx_addr.x_host, edst, sizeof (edst)); break; #endif #ifdef NETATALK case AF_APPLETALK: { struct at_ifaddr *aa; if ((aa = at_ifawithnet((const struct sockaddr_at *)dst)) == NULL) senderr(EHOSTUNREACH); /* XXX */ if (!aarpresolve(ifp, m, (const struct sockaddr_at *)dst, edst)) { ifa_free(&aa->aa_ifa); return (0); } /* * In the phase 2 case, need to prepend an mbuf for the llc header. */ if ( aa->aa_flags & AFA_PHASE2 ) { struct llc llc; ifa_free(&aa->aa_ifa); M_PREPEND(m, LLC_SNAPFRAMELEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP; llc.llc_control = LLC_UI; bcopy(at_org_code, llc.llc_snap_org_code, sizeof(at_org_code)); llc.llc_snap_ether_type = htons( ETHERTYPE_AT ); bcopy(&llc, mtod(m, caddr_t), LLC_SNAPFRAMELEN); type = htons(m->m_pkthdr.len); hlen = LLC_SNAPFRAMELEN + ETHER_HDR_LEN; } else { ifa_free(&aa->aa_ifa); type = htons(ETHERTYPE_AT); } break; } #endif /* NETATALK */ case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; eh = (const struct ether_header *)dst->sa_data; (void)memcpy(esrc, eh->ether_shost, sizeof (esrc)); /* FALLTHROUGH */ case AF_UNSPEC: loop_copy = 0; /* if this is for us, don't do it */ eh = (const struct ether_header *)dst->sa_data; (void)memcpy(edst, eh->ether_dhost, sizeof (edst)); type = eh->ether_type; break; } #ifdef MPLS case AF_MPLS: if (lle != NULL && (lle->la_flags & LLE_VALID)) bcopy(&lle->ll_addr.mac16 , edst, sizeof(edst)); else error = mpls_arpresolve(ifp, rt0, m, dst, edst, &lle); if (error) return (error == EWOULDBLOCK ? 0 : error); if (m->m_flags & (M_BCAST | M_MCAST)) type = htons(ETHERTYPE_MPLS_MCAST); else type = htons(ETHERTYPE_MPLS); break; #endif /* MPLS */ default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } if (lle != NULL && (lle->la_flags & LLE_IFADDR)) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); eh = mtod(m, struct ether_header *); (void)memcpy(&eh->ether_type, &type, sizeof(eh->ether_type)); (void)memcpy(eh->ether_dhost, edst, sizeof (edst)); if (hdrcmplt) (void)memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost)); else (void)memcpy(eh->ether_shost, IF_LLADDR(ifp), sizeof(eh->ether_shost)); /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { if (m->m_flags & M_BCAST) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else ifp->if_iqdrops++; } else if (bcmp(eh->ether_dhost, eh->ether_shost, ETHER_ADDR_LEN) == 0) { update_mbuf_csumflags(m, m); (void) if_simloop(ifp, m, dst->sa_family, hlen); return (0); /* XXX */ } } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (IFP2AC(ifp)->ac_netgraph != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); }
/* * Ethernet output routine. * Encapsulate a packet of type family for the local net. * Use trailer local net encapsulation if enough data in first * packet leaves a multiple of 512 bytes of data in remainder. */ int ether_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { short type; int error = 0, hdrcmplt = 0; u_char edst[ETHER_ADDR_LEN]; struct llentry *lle = NULL; struct rtentry *rt0 = NULL; struct ether_header *eh; struct pf_mtag *t; int loop_copy = 1; int hlen; /* link layer header length */ int is_gw = 0; uint32_t pflags = 0; if (ro != NULL) { if (!(m->m_flags & (M_BCAST | M_MCAST))) { lle = ro->ro_lle; if (lle != NULL) pflags = lle->la_flags; } rt0 = ro->ro_rt; if (rt0 != NULL && (rt0->rt_flags & RTF_GATEWAY) != 0) is_gw = 1; } #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) senderr(error); #endif M_PROFILE(m); if (ifp->if_flags & IFF_MONITOR) senderr(ENETDOWN); if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) senderr(ENETDOWN); hlen = ETHER_HDR_LEN; switch (dst->sa_family) { #ifdef INET case AF_INET: if (lle != NULL && (pflags & LLE_VALID) != 0) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = arpresolve(ifp, is_gw, m, dst, edst, &pflags); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IP); break; case AF_ARP: { struct arphdr *ah; ah = mtod(m, struct arphdr *); ah->ar_hrd = htons(ARPHRD_ETHER); loop_copy = 0; /* if this is for us, don't do it */ switch(ntohs(ah->ar_op)) { case ARPOP_REVREQUEST: case ARPOP_REVREPLY: type = htons(ETHERTYPE_REVARP); break; case ARPOP_REQUEST: case ARPOP_REPLY: default: type = htons(ETHERTYPE_ARP); break; } if (m->m_flags & M_BCAST) bcopy(ifp->if_broadcastaddr, edst, ETHER_ADDR_LEN); else bcopy(ar_tha(ah), edst, ETHER_ADDR_LEN); } break; #endif #ifdef INET6 case AF_INET6: if (lle != NULL && (pflags & LLE_VALID)) memcpy(edst, &lle->ll_addr.mac16, sizeof(edst)); else error = nd6_resolve(ifp, is_gw, m, dst, (u_char *)edst, &pflags); if (error) return (error == EWOULDBLOCK ? 0 : error); type = htons(ETHERTYPE_IPV6); break; #endif case pseudo_AF_HDRCMPLT: { const struct ether_header *eh; hdrcmplt = 1; /* FALLTHROUGH */ case AF_UNSPEC: loop_copy = 0; /* if this is for us, don't do it */ eh = (const struct ether_header *)dst->sa_data; (void)memcpy(edst, eh->ether_dhost, sizeof (edst)); type = eh->ether_type; break; } default: if_printf(ifp, "can't handle af%d\n", dst->sa_family); senderr(EAFNOSUPPORT); } if ((pflags & LLE_IFADDR) != 0) { update_mbuf_csumflags(m, m); return (if_simloop(ifp, m, dst->sa_family, 0)); } /* * Add local net header. If no space in first mbuf, * allocate another. */ M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT); if (m == NULL) senderr(ENOBUFS); eh = mtod(m, struct ether_header *); if (hdrcmplt == 0) { memcpy(&eh->ether_type, &type, sizeof(eh->ether_type)); memcpy(eh->ether_dhost, edst, sizeof (edst)); memcpy(eh->ether_shost, IF_LLADDR(ifp),sizeof(eh->ether_shost)); } /* * If a simplex interface, and the packet is being sent to our * Ethernet address or a broadcast address, loopback a copy. * XXX To make a simplex device behave exactly like a duplex * device, we should copy in the case of sending to our own * ethernet address (thus letting the original actually appear * on the wire). However, we don't do that here for security * reasons and compatibility with the original behavior. */ if ((ifp->if_flags & IFF_SIMPLEX) && loop_copy && ((t = pf_find_mtag(m)) == NULL || !t->routed)) { if (m->m_flags & M_BCAST) { struct mbuf *n; /* * Because if_simloop() modifies the packet, we need a * writable copy through m_dup() instead of a readonly * one as m_copy[m] would give us. The alternative would * be to modify if_simloop() to handle the readonly mbuf, * but performancewise it is mostly equivalent (trading * extra data copying vs. extra locking). * * XXX This is a local workaround. A number of less * often used kernel parts suffer from the same bug. * See PR kern/105943 for a proposed general solution. */ if ((n = m_dup(m, M_NOWAIT)) != NULL) { update_mbuf_csumflags(m, n); (void)if_simloop(ifp, n, dst->sa_family, hlen); } else if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); } else if (bcmp(eh->ether_dhost, eh->ether_shost, ETHER_ADDR_LEN) == 0) { update_mbuf_csumflags(m, m); (void) if_simloop(ifp, m, dst->sa_family, hlen); return (0); /* XXX */ } } /* * Bridges require special output handling. */ if (ifp->if_bridge) { BRIDGE_OUTPUT(ifp, m, error); return (error); } #if defined(INET) || defined(INET6) if (ifp->if_carp && (error = (*carp_output_p)(ifp, m, dst))) goto bad; #endif /* Handle ng_ether(4) processing, if any */ if (ifp->if_l2com != NULL) { KASSERT(ng_ether_output_p != NULL, ("ng_ether_output_p is NULL")); if ((error = (*ng_ether_output_p)(ifp, &m)) != 0) { bad: if (m != NULL) m_freem(m); return (error); } if (m == NULL) return (0); } /* Continue with link-layer output */ return ether_output_frame(ifp, m); }
/** * Handle data on netgraph hooks. * Frames processing is deferred to a taskqueue because this might * be called with non-sleepable locks held and code paths inside * the virtual switch might sleep. */ static int ng_vboxnetflt_rcvdata(hook_p hook, item_p item) { const node_p node = NG_HOOK_NODE(hook); PVBOXNETFLTINS pThis = NG_NODE_PRIVATE(node); struct ifnet *ifp = pThis->u.s.ifp; struct mbuf *m; struct m_tag *mtag; bool fActive; VBOXCURVNET_SET(ifp->if_vnet); fActive = vboxNetFltTryRetainBusyActive(pThis); NGI_GET_M(item, m); NG_FREE_ITEM(item); /* Locate tag to see if processing should be skipped for this frame */ mtag = m_tag_locate(m, MTAG_VBOX, PACKET_TAG_VBOX, NULL); if (mtag != NULL) { m_tag_unlink(m, mtag); m_tag_free(mtag); } /* * Handle incoming hook. This is connected to the * input path of the interface, thus handling incoming frames. */ if (pThis->u.s.input == hook) { if (mtag != NULL || !fActive) { ether_demux(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); VBOXCURVNET_RESTORE(); return (0); } mtx_lock_spin(&pThis->u.s.inq.ifq_mtx); _IF_ENQUEUE(&pThis->u.s.inq, m); mtx_unlock_spin(&pThis->u.s.inq.ifq_mtx); taskqueue_enqueue_fast(taskqueue_fast, &pThis->u.s.tskin); } /* * Handle mbufs on the outgoing hook, frames going to the interface */ else if (pThis->u.s.output == hook) { if (mtag != NULL || !fActive) { int rc = ether_output_frame(ifp, m); if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); VBOXCURVNET_RESTORE(); return rc; } mtx_lock_spin(&pThis->u.s.outq.ifq_mtx); _IF_ENQUEUE(&pThis->u.s.outq, m); mtx_unlock_spin(&pThis->u.s.outq.ifq_mtx); taskqueue_enqueue_fast(taskqueue_fast, &pThis->u.s.tskout); } else { m_freem(m); } if (fActive) vboxNetFltRelease(pThis, true /*fBusy*/); VBOXCURVNET_RESTORE(); return (0); }
/* * forward a chain of packets to the proper destination. * This runs outside the dummynet lock. */ static void dummynet_send(struct mbuf *m) { struct mbuf *n; for (; m != NULL; m = n) { struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */ struct m_tag *tag; int dst; n = m->m_nextpkt; m->m_nextpkt = NULL; tag = m_tag_first(m); if (tag == NULL) { /* should not happen */ dst = DIR_DROP; } else { struct dn_pkt_tag *pkt = dn_tag_get(m); /* extract the dummynet info, rename the tag * to carry reinject info. */ dst = pkt->dn_dir; ifp = pkt->ifp; tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; } switch (dst) { case DIR_OUT: ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); break ; case DIR_IN : netisr_dispatch(NETISR_IP, m); break; #ifdef INET6 case DIR_IN | PROTO_IPV6: netisr_dispatch(NETISR_IPV6, m); break; case DIR_OUT | PROTO_IPV6: ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; #endif case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */ if (bridge_dn_p != NULL) ((*bridge_dn_p)(m, ifp)); else printf("dummynet: if_bridge not loaded\n"); break; case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */ /* * The Ethernet code assumes the Ethernet header is * contiguous in the first mbuf header. * Insure this is true. */ if (m->m_len < ETHER_HDR_LEN && (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { printf("dummynet/ether: pullup failed, " "dropping packet\n"); break; } ether_demux(m->m_pkthdr.rcvif, m); break; case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */ ether_output_frame(ifp, m); break; case DIR_DROP: /* drop the packet after some time */ FREE_PKT(m); break; default: printf("dummynet: bad switch %d!\n", dst); FREE_PKT(m); break; } } }