/* * Return the mbuf tag holding the dummynet state (it should * be the first one on the list). */ static struct dn_pkt_tag * dn_tag_get(struct mbuf *m) { struct m_tag *mtag = m_tag_first(m); KASSERT(mtag != NULL && mtag->m_tag_cookie == MTAG_ABI_COMPAT && mtag->m_tag_id == PACKET_TAG_DUMMYNET, ("packet on dummynet queue w/o dummynet tag!")); return (struct dn_pkt_tag *)(mtag+1); }
/* * * Called from ip_output(). * 1 = drop packet, 0 = continue processing packet, * -1 = packet was reinjected and stop processing packet */ int ip_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error) { #ifdef IPSEC struct secpolicy *sp = NULL; struct ip *ip = mtod(*m, struct ip *); struct tdb_ident *tdbi; struct m_tag *mtag; int s; /* * Check the security policy (SP) for the packet and, if * required, do IPsec-related processing. There are two * cases here; the first time a packet is sent through * it will be untagged and handled by ipsec4_checkpolicy. * If the packet is resubmitted to ip_output (e.g. after * AH, ESP, etc. processing), there will be a tag to bypass * the lookup and related policy checking. */ mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); s = splnet(); if (mtag != NULL) { tdbi = (struct tdb_ident *)(mtag + 1); sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); if (sp == NULL) *error = -EINVAL; /* force silent drop */ m_tag_delete(*m, mtag); } else { sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, error, inp); } /* * There are four return cases: * sp != NULL apply IPsec policy * sp == NULL, error == 0 no IPsec handling needed * sp == NULL, error == -EINVAL discard packet w/o error * sp == NULL, error != 0 discard packet, report error */ if (sp != NULL) { /* Loop detection, check if ipsec processing already done */ KASSERT(sp->req != NULL, ("ip_output: no ipsec request")); for (mtag = m_tag_first(*m); mtag != NULL; mtag = m_tag_next(*m, mtag)) { if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) continue; if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) continue; /* * Check if policy has an SA associated with it. * This can happen when an SP has yet to acquire * an SA; e.g. on first reference. If it occurs, * then we let ipsec4_process_packet do its thing. */ if (sp->req->sav == NULL) break; tdbi = (struct tdb_ident *)(mtag + 1); if (tdbi->spi == sp->req->sav->spi && tdbi->proto == sp->req->sav->sah->saidx.proto && bcmp(&tdbi->dst, &sp->req->sav->sah->saidx.dst, sizeof (union sockaddr_union)) == 0) { /* * No IPsec processing is needed, free * reference to SP. * * NB: null pointer to avoid free at * done: below. */ KEY_FREESP(&sp), sp = NULL; splx(s); goto done; } } /* * Do delayed checksums now because we send before * this is done in the normal processing path. */ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { in_delayed_cksum(*m); (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } #ifdef SCTP if ((*m)->m_pkthdr.csum_flags & CSUM_SCTP) { sctp_delayed_cksum(*m, (uint32_t)(ip->ip_hl << 2)); (*m)->m_pkthdr.csum_flags &= ~CSUM_SCTP; } #endif ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); /* NB: callee frees mbuf */ *error = ipsec4_process_packet(*m, sp->req, *flags, 0); if (*error == EJUSTRETURN) { /* * We had a SP with a level of 'use' and no SA. We * will just continue to process the packet without * IPsec processing and return without error. */ *error = 0; ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); goto done; } /* * Preserve KAME behaviour: ENOENT can be returned * when an SA acquire is in progress. Don't propagate * this to user-level; it confuses applications. * * XXX this will go away when the SADB is redone. */ if (*error == ENOENT) *error = 0; splx(s); goto reinjected; } else { /* sp == NULL */ splx(s); if (*error != 0) { /* * Hack: -EINVAL is used to signal that a packet * should be silently discarded. This is typically * because we asked key management for an SA and * it was delayed (e.g. kicked up to IKE). */ if (*error == -EINVAL) *error = 0; goto bad; } else { /* No IPsec processing for this packet. */ } } done: if (sp != NULL) KEY_FREESP(&sp); return 0; reinjected: if (sp != NULL) KEY_FREESP(&sp); return -1; bad: if (sp != NULL) KEY_FREESP(&sp); return 1; #endif /* IPSEC */ return 0; }
void ip6_forward(struct mbuf *m, int srcrt) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct sockaddr_in6 *dst; struct rtentry *rt; int error = 0, type = 0, code = 0; struct mbuf *mcopy = NULL; struct ifnet *origifp; /* maybe unnecessary */ #ifdef IPSEC u_int8_t sproto = 0; struct m_tag *mtag; union sockaddr_union sdst; struct tdb_ident *tdbi; u_int32_t sspi; struct tdb *tdb; int s; #if NPF > 0 struct ifnet *encif; #endif #endif /* IPSEC */ u_int rtableid = 0; /* * Do not forward packets to multicast destination (should be handled * by ip6_mforward(). * Do not forward packets with unspecified source. It was discussed * in July 2000, on ipngwg mailing list. */ if ((m->m_flags & (M_BCAST|M_MCAST)) != 0 || IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) || IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) { ip6stat.ip6s_cantforward++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ if (ip6_log_time + ip6_log_interval < time_second) { ip6_log_time = time_second; log(LOG_DEBUG, "cannot forward " "from %s to %s nxt %d received on %s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), ip6->ip6_nxt, m->m_pkthdr.rcvif->if_xname); } m_freem(m); return; } if (ip6->ip6_hlim <= IPV6_HLIMDEC) { /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard) */ icmp6_error(m, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT, 0); return; } ip6->ip6_hlim -= IPV6_HLIMDEC; #if NPF > 0 reroute: #endif #ifdef IPSEC if (!ipsec_in_use) goto done_spd; s = splnet(); /* * Check if there was an outgoing SA bound to the flow * from a transport protocol. */ /* Do we have any pending SAs to apply ? */ mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); if (mtag != NULL) { #ifdef DIAGNOSTIC if (mtag->m_tag_len != sizeof (struct tdb_ident)) panic("ip6_forward: tag of length %d (should be %d", mtag->m_tag_len, sizeof (struct tdb_ident)); #endif tdbi = (struct tdb_ident *)(mtag + 1); tdb = gettdb(tdbi->rdomain, tdbi->spi, &tdbi->dst, tdbi->proto); if (tdb == NULL) error = -EINVAL; m_tag_delete(m, mtag); } else tdb = ipsp_spd_lookup(m, AF_INET6, sizeof(struct ip6_hdr), &error, IPSP_DIRECTION_OUT, NULL, NULL); if (tdb == NULL) { splx(s); if (error == 0) { /* * No IPsec processing required, we'll just send the * packet out. */ sproto = 0; /* Fall through to routing/multicast handling */ } else { /* * -EINVAL is used to indicate that the packet should * be silently dropped, typically because we've asked * key management for an SA. */ if (error == -EINVAL) /* Should silently drop packet */ error = 0; goto freecopy; } } else { /* Loop detection */ for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) { if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) continue; tdbi = (struct tdb_ident *)(mtag + 1); if (tdbi->spi == tdb->tdb_spi && tdbi->proto == tdb->tdb_sproto && tdbi->rdomain == tdb->tdb_rdomain && !bcmp(&tdbi->dst, &tdb->tdb_dst, sizeof(union sockaddr_union))) { splx(s); sproto = 0; /* mark as no-IPsec-needed */ goto done_spd; } } /* We need to do IPsec */ bcopy(&tdb->tdb_dst, &sdst, sizeof(sdst)); sspi = tdb->tdb_spi; sproto = tdb->tdb_sproto; splx(s); } /* Fall through to the routing/multicast handling code */ done_spd: #endif /* IPSEC */ #if NPF > 0 rtableid = m->m_pkthdr.rdomain; #endif /* * Save at most ICMPV6_PLD_MAXLEN (= the min IPv6 MTU - * size of IPv6 + ICMPv6 headers) bytes of the packet in case * we need to generate an ICMP6 message to the src. * Thanks to M_EXT, in most cases copy will not occur. * * It is important to save it before IPsec processing as IPsec * processing may modify the mbuf. */ mcopy = m_copy(m, 0, imin(m->m_pkthdr.len, ICMPV6_PLD_MAXLEN)); dst = &ip6_forward_rt.ro_dst; if (!srcrt) { /* * ip6_forward_rt.ro_dst.sin6_addr is equal to ip6->ip6_dst */ if (ip6_forward_rt.ro_rt == 0 || (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) == 0 || ip6_forward_rt.ro_tableid != rtableid) { if (ip6_forward_rt.ro_rt) { RTFREE(ip6_forward_rt.ro_rt); ip6_forward_rt.ro_rt = 0; } /* this probably fails but give it a try again */ ip6_forward_rt.ro_tableid = rtableid; rtalloc_mpath((struct route *)&ip6_forward_rt, &ip6->ip6_src.s6_addr32[0]); } if (ip6_forward_rt.ro_rt == 0) { ip6stat.ip6s_noroute++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_noroute) */ if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } m_freem(m); return; } } else if (ip6_forward_rt.ro_rt == 0 || (ip6_forward_rt.ro_rt->rt_flags & RTF_UP) == 0 || !IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst->sin6_addr) || ip6_forward_rt.ro_tableid != rtableid) { if (ip6_forward_rt.ro_rt) { RTFREE(ip6_forward_rt.ro_rt); ip6_forward_rt.ro_rt = 0; } bzero(dst, sizeof(*dst)); dst->sin6_len = sizeof(struct sockaddr_in6); dst->sin6_family = AF_INET6; dst->sin6_addr = ip6->ip6_dst; ip6_forward_rt.ro_tableid = rtableid; rtalloc_mpath((struct route *)&ip6_forward_rt, &ip6->ip6_src.s6_addr32[0]); if (ip6_forward_rt.ro_rt == 0) { ip6stat.ip6s_noroute++; /* XXX in6_ifstat_inc(rt->rt_ifp, ifs6_in_noroute) */ if (mcopy) { icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE, 0); } m_freem(m); return; } } rt = ip6_forward_rt.ro_rt; /* * Scope check: if a packet can't be delivered to its destination * for the reason that the destination is beyond the scope of the * source address, discard the packet and return an icmp6 destination * unreachable error with Code 2 (beyond scope of source address). * [draft-ietf-ipngwg-icmp-v3-00.txt, Section 3.1] */ if (in6_addr2scopeid(m->m_pkthdr.rcvif, &ip6->ip6_src) != in6_addr2scopeid(rt->rt_ifp, &ip6->ip6_src)) { ip6stat.ip6s_cantforward++; ip6stat.ip6s_badscope++; in6_ifstat_inc(rt->rt_ifp, ifs6_in_discard); if (ip6_log_time + ip6_log_interval < time_second) { ip6_log_time = time_second; log(LOG_DEBUG, "cannot forward " "src %s, dst %s, nxt %d, rcvif %s, outif %s\n", ip6_sprintf(&ip6->ip6_src), ip6_sprintf(&ip6->ip6_dst), ip6->ip6_nxt, m->m_pkthdr.rcvif->if_xname, rt->rt_ifp->if_xname); } if (mcopy) icmp6_error(mcopy, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_BEYONDSCOPE, 0); m_freem(m); goto freert; } #ifdef IPSEC /* * Check if the packet needs encapsulation. * ipsp_process_packet will never come back to here. * XXX ipsp_process_packet() calls ip6_output(), and there'll be no * PMTU notification. is it okay? */ if (sproto != 0) { s = splnet(); tdb = gettdb(rtable_l2(m->m_pkthdr.rdomain), sspi, &sdst, sproto); if (tdb == NULL) { splx(s); error = EHOSTUNREACH; m_freem(m); goto senderr; /*XXX*/ } #if NPF > 0 if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL || pf_test6(PF_FWD, encif, &m, NULL) != PF_PASS) { splx(s); error = EHOSTUNREACH; m_freem(m); goto senderr; } if (m == NULL) { splx(s); goto senderr; } ip6 = mtod(m, struct ip6_hdr *); /* * PF_TAG_REROUTE handling or not... * Packet is entering IPsec so the routing is * already overruled by the IPsec policy. * Until now the change was not reconsidered. * What's the behaviour? */ #endif m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */ /* Callee frees mbuf */ error = ipsp_process_packet(m, tdb, AF_INET6, 0); splx(s); m_freem(mcopy); goto freert; }
int ip6_ipsec_output(struct mbuf **m, struct inpcb *inp, int *flags, int *error, struct ifnet **ifp, struct secpolicy **sp) { #ifdef IPSEC struct tdb_ident *tdbi; struct m_tag *mtag; /* XXX int s; */ if (sp == NULL) return 1; mtag = m_tag_find(*m, PACKET_TAG_IPSEC_PENDING_TDB, NULL); if (mtag != NULL) { tdbi = (struct tdb_ident *)(mtag + 1); *sp = ipsec_getpolicy(tdbi, IPSEC_DIR_OUTBOUND); if (*sp == NULL) *error = -EINVAL; /* force silent drop */ m_tag_delete(*m, mtag); } else { *sp = ipsec4_checkpolicy(*m, IPSEC_DIR_OUTBOUND, *flags, error, inp); } /* * There are four return cases: * sp != NULL apply IPsec policy * sp == NULL, error == 0 no IPsec handling needed * sp == NULL, error == -EINVAL discard packet w/o error * sp == NULL, error != 0 discard packet, report error */ if (*sp != NULL) { /* Loop detection, check if ipsec processing already done */ KASSERT((*sp)->req != NULL, ("ip_output: no ipsec request")); for (mtag = m_tag_first(*m); mtag != NULL; mtag = m_tag_next(*m, mtag)) { if (mtag->m_tag_cookie != MTAG_ABI_COMPAT) continue; if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE && mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED) continue; /* * Check if policy has an SA associated with it. * This can happen when an SP has yet to acquire * an SA; e.g. on first reference. If it occurs, * then we let ipsec4_process_packet do its thing. */ if ((*sp)->req->sav == NULL) break; tdbi = (struct tdb_ident *)(mtag + 1); if (tdbi->spi == (*sp)->req->sav->spi && tdbi->proto == (*sp)->req->sav->sah->saidx.proto && bcmp(&tdbi->dst, &(*sp)->req->sav->sah->saidx.dst, sizeof (union sockaddr_union)) == 0) { /* * No IPsec processing is needed, free * reference to SP. * * NB: null pointer to avoid free at * done: below. */ KEY_FREESP(sp), *sp = NULL; /* XXX splx(s); */ goto done; } } /* * Do delayed checksums now because we send before * this is done in the normal processing path. */ if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { ipseclog((LOG_DEBUG, "%s: we do not support IPv4 over IPv6", __func__)); #ifdef INET in_delayed_cksum(*m); #endif (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } /* * Preserve KAME behaviour: ENOENT can be returned * when an SA acquire is in progress. Don't propagate * this to user-level; it confuses applications. * * XXX this will go away when the SADB is redone. */ if (*error == ENOENT) *error = 0; goto do_ipsec; } else { /* sp == NULL */ if (*error != 0) { /* * Hack: -EINVAL is used to signal that a packet * should be silently discarded. This is typically * because we asked key management for an SA and * it was delayed (e.g. kicked up to IKE). */ if (*error == -EINVAL) *error = 0; goto bad; } else { /* No IPsec processing for this packet. */ } } done: return 0; do_ipsec: return -1; bad: return 1; #endif /* IPSEC */ return 0; }
/* * dummynet hook for packets. * We use the argument to locate the flowset fs and the sched_set sch * associated to it. The we apply flow_mask and sched_mask to * determine the queue and scheduler instances. * * dir where shall we send the packet after dummynet. * *m0 the mbuf with the packet * ifp the 'ifp' parameter from the caller. * NULL in ip_input, destination interface in ip_output, */ int dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) { struct mbuf *m = *m0; struct dn_fsk *fs = NULL; struct dn_sch_inst *si; struct dn_queue *q = NULL; /* default */ int fs_id = (fwa->rule.info & IPFW_INFO_MASK) + ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0); DN_BH_WLOCK(); io_pkt++; /* we could actually tag outside the lock, but who cares... */ if (tag_mbuf(m, dir, fwa)) goto dropit; if (dn_cfg.busy) { /* if the upper half is busy doing something expensive, * lets queue the packet and move forward */ mq_append(&dn_cfg.pending, m); m = *m0 = NULL; /* consumed */ goto done; /* already active, nothing to do */ } /* XXX locate_flowset could be optimised with a direct ref. */ fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL); if (fs == NULL) goto dropit; /* This queue/pipe does not exist! */ if (fs->sched == NULL) /* should not happen */ goto dropit; /* find scheduler instance, possibly applying sched_mask */ si = ipdn_si_find(fs->sched, &(fwa->f_id)); if (si == NULL) goto dropit; /* * If the scheduler supports multiple queues, find the right one * (otherwise it will be ignored by enqueue). */ if (fs->sched->fp->flags & DN_MULTIQUEUE) { q = ipdn_q_find(fs, si, &(fwa->f_id)); if (q == NULL) goto dropit; } if (fs->sched->fp->enqueue(si, q, m)) { /* packet was dropped by enqueue() */ m = *m0 = NULL; goto dropit; } if (si->kflags & DN_ACTIVE) { m = *m0 = NULL; /* consumed */ goto done; /* already active, nothing to do */ } /* compute the initial allowance */ if (si->idle_time < dn_cfg.curr_time) { /* Do this only on the first packet on an idle pipe */ struct dn_link *p = &fs->sched->link; si->sched_time = dn_cfg.curr_time; si->credit = dn_cfg.io_fast ? p->bandwidth : 0; if (p->burst) { uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth; if (burst > p->burst) burst = p->burst; si->credit += burst; } } /* pass through scheduler and delay line */ m = serve_sched(NULL, si, dn_cfg.curr_time); /* optimization -- pass it back to ipfw for immediate send */ /* XXX Don't call dummynet_send() if scheduler return the packet * just enqueued. This avoid a lock order reversal. * */ if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) { /* fast io, rename the tag * to carry reinject info. */ struct m_tag *tag = m_tag_first(m); tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; io_pkt_fast++; if (m->m_nextpkt != NULL) { printf("dummynet: fast io: pkt chain detected!\n"); m->m_nextpkt = NULL; } m = NULL; } else { *m0 = NULL; } done: DN_BH_WUNLOCK(); if (m) dummynet_send(m); return 0; dropit: io_pkt_drop++; DN_BH_WUNLOCK(); if (m) FREE_PKT(m); *m0 = NULL; return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS; }
/* * forward a chain of packets to the proper destination. * This runs outside the dummynet lock. */ static void dummynet_send(struct mbuf *m) { struct mbuf *n; for (; m != NULL; m = n) { struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */ struct m_tag *tag; int dst; n = m->m_nextpkt; m->m_nextpkt = NULL; tag = m_tag_first(m); if (tag == NULL) { /* should not happen */ dst = DIR_DROP; } else { struct dn_pkt_tag *pkt = dn_tag_get(m); /* extract the dummynet info, rename the tag * to carry reinject info. */ dst = pkt->dn_dir; ifp = pkt->ifp; tag->m_tag_cookie = MTAG_IPFW_RULE; tag->m_tag_id = 0; } switch (dst) { case DIR_OUT: ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); break ; case DIR_IN : netisr_dispatch(NETISR_IP, m); break; #ifdef INET6 case DIR_IN | PROTO_IPV6: netisr_dispatch(NETISR_IPV6, m); break; case DIR_OUT | PROTO_IPV6: ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); break; #endif case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */ if (bridge_dn_p != NULL) ((*bridge_dn_p)(m, ifp)); else printf("dummynet: if_bridge not loaded\n"); break; case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */ /* * The Ethernet code assumes the Ethernet header is * contiguous in the first mbuf header. * Insure this is true. */ if (m->m_len < ETHER_HDR_LEN && (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { printf("dummynet/ether: pullup failed, " "dropping packet\n"); break; } ether_demux(m->m_pkthdr.rcvif, m); break; case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */ ether_output_frame(ifp, m); break; case DIR_DROP: /* drop the packet after some time */ FREE_PKT(m); break; default: printf("dummynet: bad switch %d!\n", dst); FREE_PKT(m); break; } } }