Beispiel #1
0
void
an_start(struct ifnet *ifp)
{
	struct an_softc *sc = (struct an_softc *)ifp->if_softc;
	struct ieee80211com *ic = &sc->sc_ic;
	struct ieee80211_node *ni;
	struct ieee80211_frame *wh;
	struct an_txframe frmhdr;
	struct mbuf *m;
	u_int16_t len;
	int cur, fid;

	if (!sc->sc_enabled || sc->sc_invalid) {
		DPRINTF(("an_start: noop: enabled %d invalid %d\n",
		    sc->sc_enabled, sc->sc_invalid));
		return;
	}

	memset(&frmhdr, 0, sizeof(frmhdr));
	cur = sc->sc_txnext;
	for (;;) {
		if (ic->ic_state != IEEE80211_S_RUN) {
			DPRINTF(("an_start: not running %d\n", ic->ic_state));
			break;
		}
		m = ifq_deq_begin(&ifp->if_snd);
		if (m == NULL) {
			DPRINTF2(("an_start: no pending mbuf\n"));
			break;
		}
		if (sc->sc_txd[cur].d_inuse) {
			ifq_deq_rollback(&ifp->if_snd, m);
			DPRINTF2(("an_start: %x/%d busy\n",
			    sc->sc_txd[cur].d_fid, cur));
			ifq_set_oactive(&ifp->if_snd);
			break;
		}
		ifq_deq_commit(&ifp->if_snd, m);
		ifp->if_opackets++;
#if NBPFILTER > 0
		if (ifp->if_bpf)
			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
#endif
		if ((m = ieee80211_encap(ifp, m, &ni)) == NULL) {
			ifp->if_oerrors++;
			continue;
		}
		if (ni != NULL)
			ieee80211_release_node(ic, ni);
#if NBPFILTER > 0
		if (ic->ic_rawbpf)
			bpf_mtap(ic->ic_rawbpf, m, BPF_DIRECTION_OUT);
#endif

		wh = mtod(m, struct ieee80211_frame *);
		if (ic->ic_flags & IEEE80211_F_WEPON)
			wh->i_fc[1] |= IEEE80211_FC1_WEP;
		m_copydata(m, 0, sizeof(struct ieee80211_frame),
		    (caddr_t)&frmhdr.an_whdr);
		an_swap16((u_int16_t *)&frmhdr.an_whdr, sizeof(struct ieee80211_frame)/2);

		/* insert payload length in front of llc/snap */
		len = htons(m->m_pkthdr.len - sizeof(struct ieee80211_frame));
		m_adj(m, sizeof(struct ieee80211_frame) - sizeof(len));
		if (mtod(m, u_long) & 0x01)
			memcpy(mtod(m, caddr_t), &len, sizeof(len));
		else
			*mtod(m, u_int16_t *) = len;

		/*
		 * XXX Aironet firmware apparently convert the packet
		 * with longer than 1500 bytes in length into LLC/SNAP.
		 * If we have 1500 bytes in ethernet payload, it is
		 * 1508 bytes including LLC/SNAP and will be inserted
		 * additional LLC/SNAP header with 1501-1508 in its
		 * ethertype !!
		 * So we skip LLC/SNAP header and force firmware to
		 * convert it to LLC/SNAP again.
		 */
		m_adj(m, sizeof(struct llc));

		frmhdr.an_tx_ctl = AN_TXCTL_80211;
		frmhdr.an_tx_payload_len = m->m_pkthdr.len;
		frmhdr.an_gaplen = AN_TXGAP_802_11;

		if (ic->ic_fixed_rate != -1)
			frmhdr.an_tx_rate =
			    ic->ic_sup_rates[IEEE80211_MODE_11B].rs_rates[
			    ic->ic_fixed_rate] & IEEE80211_RATE_VAL;
		else
			frmhdr.an_tx_rate = 0;

		if (sizeof(frmhdr) + AN_TXGAP_802_11 + sizeof(len) +
		    m->m_pkthdr.len > AN_TX_MAX_LEN) {
			ifp->if_oerrors++;
			m_freem(m);
			continue;
		}

#if NBPFILTER > 0
		if (sc->sc_drvbpf) {
			struct mbuf mb;
			struct an_tx_radiotap_header *tap = &sc->sc_txtap;

			tap->at_rate = 
			    ic->ic_bss->ni_rates.rs_rates[ic->ic_bss->ni_txrate];
			tap->at_chan_freq =
			    ic->ic_bss->ni_chan->ic_freq;
			tap->at_chan_flags =
			    ic->ic_bss->ni_chan->ic_flags;

			mb.m_data = (caddr_t)tap;
			mb.m_len = sizeof(sc->sc_txtapu);
			mb.m_next = m;
			mb.m_nextpkt = NULL;
			mb.m_type = 0;
			mb.m_flags = 0;
			bpf_mtap(sc->sc_drvbpf, m, BPF_DIRECTION_OUT);
		}
#endif

		fid = sc->sc_txd[cur].d_fid;
		if (an_write_bap(sc, fid, 0, &frmhdr, sizeof(frmhdr)) != 0) {
			ifp->if_oerrors++;
			m_freem(m);
			continue;
		}
		/* dummy write to avoid seek. */
		an_write_bap(sc, fid, -1, &frmhdr, AN_TXGAP_802_11);
		an_mwrite_bap(sc, fid, -1, m, m->m_pkthdr.len);
		m_freem(m);

		DPRINTF2(("an_start: send %d byte via %x/%d\n",
		    ntohs(len) + sizeof(struct ieee80211_frame),
		    fid, cur));
		sc->sc_txd[cur].d_inuse = 1;
		if (an_cmd(sc, AN_CMD_TX, fid)) {
			printf("%s: xmit failed\n", ifp->if_xname);
			sc->sc_txd[cur].d_inuse = 0;
			continue;
		}
		sc->sc_tx_timer = 5;
		ifp->if_timer = 1;
		AN_INC(cur, AN_TX_RING_CNT);
		sc->sc_txnext = cur;
	}
}
Beispiel #2
0
/*
 * Help break down an mbuf chain by setting the first siz bytes contiguous
 * pointed to by returned val.
 * This is used by the macros nfsm_dissect for tough
 * cases. (The macros use the vars. dpos and dpos2)
 */
void *
nfsm_disct(struct mbuf **mdp, caddr_t *dposp, int siz, int left, int how)
{
	struct mbuf *mp, *mp2;
	int siz2, xfer;
	caddr_t ptr, npos = NULL;
	void *ret;

	mp = *mdp;
	while (left == 0) {
		*mdp = mp = mp->m_next;
		if (mp == NULL)
			return (NULL);
		left = mp->m_len;
		*dposp = mtod(mp, caddr_t);
	}
	if (left >= siz) {
		ret = *dposp;
		*dposp += siz;
	} else if (mp->m_next == NULL) {
		return (NULL);
	} else if (siz > MHLEN) {
		panic("nfs S too big");
	} else {
		MGET(mp2, how, MT_DATA);
		if (mp2 == NULL)
			return (NULL);
		mp2->m_len = siz;
		mp2->m_next = mp->m_next;
		mp->m_next = mp2;
		mp->m_len -= left;
		mp = mp2;
		ptr = mtod(mp, caddr_t);
		ret = ptr;
		bcopy(*dposp, ptr, left);		/* Copy what was left */
		siz2 = siz-left;
		ptr += left;
		mp2 = mp->m_next;
		npos = mtod(mp2, caddr_t);
		/* Loop around copying up the siz2 bytes */
		while (siz2 > 0) {
			if (mp2 == NULL)
				return (NULL);
			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
			if (xfer > 0) {
				bcopy(mtod(mp2, caddr_t), ptr, xfer);
				mp2->m_data += xfer;
				mp2->m_len -= xfer;
				ptr += xfer;
				siz2 -= xfer;
			}
			if (siz2 > 0) {
				mp2 = mp2->m_next;
				if (mp2 != NULL)
					npos = mtod(mp2, caddr_t);
			}
		}
		*mdp = mp2;
		*dposp = mtod(mp2, caddr_t);
		if (!nfsm_aligned(*dposp, u_int32_t)) {
			bcopy(*dposp, npos, mp2->m_len);
			mp2->m_data = npos;
			*dposp = npos;
		}
	}
	return (ret);
}
Beispiel #3
0
static
void
ip6_input(netmsg_t msg)
{
	struct mbuf *m = msg->packet.nm_packet;
	struct ip6_hdr *ip6;
	int off = sizeof(struct ip6_hdr), nest;
	u_int32_t plen;
	u_int32_t rtalert = ~0;
	int nxt, ours = 0, rh_present = 0;
	struct ifnet *deliverifp = NULL;
	struct in6_addr odst;
	int srcrt = 0;

#ifdef IPSEC
	/*
	 * should the inner packet be considered authentic?
	 * see comment in ah4_input().
	 */
	if (m) {
		m->m_flags &= ~M_AUTHIPHDR;
		m->m_flags &= ~M_AUTHIPDGM;
	}
#endif

	/*
	 * make sure we don't have onion peering information into m_aux.
	 */
	ip6_delaux(m);

	/*
	 * mbuf statistics
	 */
	if (m->m_flags & M_EXT) {
		if (m->m_next)
			ip6stat.ip6s_mext2m++;
		else
			ip6stat.ip6s_mext1++;
	} else {
#define M2MMAX	NELEM(ip6stat.ip6s_m2m)
		if (m->m_next) {
			if (m->m_flags & M_LOOP) {
				ip6stat.ip6s_m2m[loif[0].if_index]++;	/* XXX */
			} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
				ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
			else
				ip6stat.ip6s_m2m[0]++;
		} else
			ip6stat.ip6s_m1++;
#undef M2MMAX
	}

	in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
	ip6stat.ip6s_total++;

#ifndef PULLDOWN_TEST
	/*
	 * L2 bridge code and some other code can return mbuf chain
	 * that does not conform to KAME requirement.  too bad.
	 * XXX: fails to join if interface MTU > MCLBYTES.  jumbogram?
	 */
	if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
		struct mbuf *n;

		n = m_getb(m->m_pkthdr.len, MB_DONTWAIT, MT_HEADER, M_PKTHDR);
		if (n == NULL)
			goto bad;
		M_MOVE_PKTHDR(n, m);

		m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
		n->m_len = n->m_pkthdr.len;
		m_freem(m);
		m = n;
	}
	IP6_EXTHDR_CHECK_VOIDRET(m, 0, sizeof(struct ip6_hdr));
#endif

	if (m->m_len < sizeof(struct ip6_hdr)) {
		struct ifnet *inifp;
		inifp = m->m_pkthdr.rcvif;
		if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
			ip6stat.ip6s_toosmall++;
			in6_ifstat_inc(inifp, ifs6_in_hdrerr);
			goto bad2;
		}
	}

	ip6 = mtod(m, struct ip6_hdr *);

	if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
		ip6stat.ip6s_badvers++;
		in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
		goto bad;
	}

	/*
	 * Run through list of hooks for input packets.
	 *
	 * NB: Beware of the destination address changing
	 *     (e.g. by NAT rewriting). When this happens,
	 *     tell ip6_forward to do the right thing.
	 */
	if (pfil_has_hooks(&inet6_pfil_hook)) {
		odst = ip6->ip6_dst;
		if (pfil_run_hooks(&inet6_pfil_hook, &m,
		    m->m_pkthdr.rcvif, PFIL_IN)) {
			goto bad2;
		}
		if (m == NULL)			/* consumed by filter */
			goto bad2;
		ip6 = mtod(m, struct ip6_hdr *);
		srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
	}
Beispiel #4
0
static int
udp6_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr6,
    struct mbuf *control, struct thread *td)
{
	u_int32_t ulen = m->m_pkthdr.len;
	u_int32_t plen = sizeof(struct udphdr) + ulen;
	struct ip6_hdr *ip6;
	struct udphdr *udp6;
	struct in6_addr *laddr, *faddr, in6a;
	struct sockaddr_in6 *sin6 = NULL;
	int cscov_partial = 0;
	int scope_ambiguous = 0;
	u_short fport;
	int error = 0;
	uint8_t nxt;
	uint16_t cscov = 0;
	struct ip6_pktopts *optp, opt;
	int af = AF_INET6, hlen = sizeof(struct ip6_hdr);
	int flags;
	struct sockaddr_in6 tmp;

	INP_WLOCK_ASSERT(inp);
	INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo);

	if (addr6) {
		/* addr6 has been validated in udp6_send(). */
		sin6 = (struct sockaddr_in6 *)addr6;

		/* protect *sin6 from overwrites */
		tmp = *sin6;
		sin6 = &tmp;

		/*
		 * Application should provide a proper zone ID or the use of
		 * default zone IDs should be enabled.  Unfortunately, some
		 * applications do not behave as it should, so we need a
		 * workaround.  Even if an appropriate ID is not determined,
		 * we'll see if we can determine the outgoing interface.  If we
		 * can, determine the zone ID based on the interface below.
		 */
		if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone)
			scope_ambiguous = 1;
		if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0)
			return (error);
	}

	nxt = (inp->inp_socket->so_proto->pr_protocol == IPPROTO_UDP) ?
	    IPPROTO_UDP : IPPROTO_UDPLITE;
	if (control) {
		if ((error = ip6_setpktopts(control, &opt,
		    inp->in6p_outputopts, td->td_ucred, nxt)) != 0)
			goto release;
		optp = &opt;
	} else
		optp = inp->in6p_outputopts;

	if (sin6) {
		faddr = &sin6->sin6_addr;

		/*
		 * Since we saw no essential reason for calling in_pcbconnect,
		 * we get rid of such kind of logic, and call in6_selectsrc
		 * and in6_pcbsetport in order to fill in the local address
		 * and the local port.
		 */
		if (sin6->sin6_port == 0) {
			error = EADDRNOTAVAIL;
			goto release;
		}

		if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
			/* how about ::ffff:0.0.0.0 case? */
			error = EISCONN;
			goto release;
		}

		fport = sin6->sin6_port; /* allow 0 port */

		if (IN6_IS_ADDR_V4MAPPED(faddr)) {
			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
				/*
				 * I believe we should explicitly discard the
				 * packet when mapped addresses are disabled,
				 * rather than send the packet as an IPv6 one.
				 * If we chose the latter approach, the packet
				 * might be sent out on the wire based on the
				 * default route, the situation which we'd
				 * probably want to avoid.
				 * (20010421 [email protected])
				 */
				error = EINVAL;
				goto release;
			}
			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) &&
			    !IN6_IS_ADDR_V4MAPPED(&inp->in6p_laddr)) {
				/*
				 * when remote addr is an IPv4-mapped address,
				 * local addr should not be an IPv6 address,
				 * since you cannot determine how to map IPv6
				 * source address to IPv4.
				 */
				error = EINVAL;
				goto release;
			}

			af = AF_INET;
		}

		if (!IN6_IS_ADDR_V4MAPPED(faddr)) {
			error = in6_selectsrc_socket(sin6, optp, inp,
			    td->td_ucred, scope_ambiguous, &in6a, NULL);
			if (error)
				goto release;
			laddr = &in6a;
		} else
			laddr = &inp->in6p_laddr;	/* XXX */
		if (laddr == NULL) {
			if (error == 0)
				error = EADDRNOTAVAIL;
			goto release;
		}
		if (inp->inp_lport == 0 &&
		    (error = in6_pcbsetport(laddr, inp, td->td_ucred)) != 0) {
			/* Undo an address bind that may have occurred. */
			inp->in6p_laddr = in6addr_any;
			goto release;
		}
	} else {
		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
			error = ENOTCONN;
			goto release;
		}
		if (IN6_IS_ADDR_V4MAPPED(&inp->in6p_faddr)) {
			if ((inp->inp_flags & IN6P_IPV6_V6ONLY)) {
				/*
				 * XXX: this case would happen when the
				 * application sets the V6ONLY flag after
				 * connecting the foreign address.
				 * Such applications should be fixed,
				 * so we bark here.
				 */
				log(LOG_INFO, "udp6_output: IPV6_V6ONLY "
				    "option was set for a connected socket\n");
				error = EINVAL;
				goto release;
			} else
				af = AF_INET;
		}
		laddr = &inp->in6p_laddr;
		faddr = &inp->in6p_faddr;
		fport = inp->inp_fport;
	}

	if (af == AF_INET)
		hlen = sizeof(struct ip);

	/*
	 * Calculate data length and get a mbuf
	 * for UDP and IP6 headers.
	 */
	M_PREPEND(m, hlen + sizeof(struct udphdr), M_NOWAIT);
	if (m == NULL) {
		error = ENOBUFS;
		goto release;
	}

	/*
	 * Stuff checksum and output datagram.
	 */
	udp6 = (struct udphdr *)(mtod(m, caddr_t) + hlen);
	udp6->uh_sport = inp->inp_lport; /* lport is always set in the PCB */
	udp6->uh_dport = fport;
	if (nxt == IPPROTO_UDPLITE) {
		struct udpcb *up;

		up = intoudpcb(inp);
		cscov = up->u_txcslen;
		if (cscov >= plen)
			cscov = 0;
		udp6->uh_ulen = htons(cscov);
		/*
		 * For UDP-Lite, checksum coverage length of zero means
		 * the entire UDPLite packet is covered by the checksum.
		 */
		cscov_partial = (cscov == 0) ? 0 : 1;
	} else if (plen <= 0xffff)
		udp6->uh_ulen = htons((u_short)plen);
	else
		udp6->uh_ulen = 0;
	udp6->uh_sum = 0;

	switch (af) {
	case AF_INET6:
		ip6 = mtod(m, struct ip6_hdr *);
		ip6->ip6_flow	= inp->inp_flow & IPV6_FLOWINFO_MASK;
		ip6->ip6_vfc	&= ~IPV6_VERSION_MASK;
		ip6->ip6_vfc	|= IPV6_VERSION;
		ip6->ip6_plen	= htons((u_short)plen);
		ip6->ip6_nxt	= nxt;
		ip6->ip6_hlim	= in6_selecthlim(inp, NULL);
		ip6->ip6_src	= *laddr;
		ip6->ip6_dst	= *faddr;

		if (cscov_partial) {
			if ((udp6->uh_sum = in6_cksum_partial(m, nxt,
			    sizeof(struct ip6_hdr), plen, cscov)) == 0)
				udp6->uh_sum = 0xffff;
		} else {
			udp6->uh_sum = in6_cksum_pseudo(ip6, plen, nxt, 0);
			m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
			m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
		}

#ifdef	RSS
		{
			uint32_t hash_val, hash_type;
			uint8_t pr;

			pr = inp->inp_socket->so_proto->pr_protocol;
			/*
			 * Calculate an appropriate RSS hash for UDP and
			 * UDP Lite.
			 *
			 * The called function will take care of figuring out
			 * whether a 2-tuple or 4-tuple hash is required based
			 * on the currently configured scheme.
			 *
			 * Later later on connected socket values should be
			 * cached in the inpcb and reused, rather than constantly
			 * re-calculating it.
			 *
			 * UDP Lite is a different protocol number and will
			 * likely end up being hashed as a 2-tuple until
			 * RSS / NICs grow UDP Lite protocol awareness.
			 */
			if (rss_proto_software_hash_v6(faddr, laddr, fport,
			    inp->inp_lport, pr, &hash_val, &hash_type) == 0) {
				m->m_pkthdr.flowid = hash_val;
				M_HASHTYPE_SET(m, hash_type);
			}
		}
#endif
		flags = 0;
#ifdef	RSS
		/*
		 * Don't override with the inp cached flowid.
		 *
		 * Until the whole UDP path is vetted, it may actually
		 * be incorrect.
		 */
		flags |= IP_NODEFAULTFLOWID;
#endif

		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
		UDPSTAT_INC(udps_opackets);
		error = ip6_output(m, optp, &inp->inp_route6, flags,
		    inp->in6p_moptions, NULL, inp);
		break;
	case AF_INET:
		error = EAFNOSUPPORT;
		goto release;
	}
	goto releaseopt;

release:
	m_freem(m);

releaseopt:
	if (control) {
		ip6_clearpktopts(&opt, -1);
		m_freem(control);
	}
	return (error);
}
Beispiel #5
0
/*
 * Create a writable copy of the mbuf chain.  While doing this
 * we compact the chain with a goal of producing a chain with
 * at most two mbufs.  The second mbuf in this chain is likely
 * to be a cluster.  The primary purpose of this work is to create
 * a writable packet for encryption, compression, etc.  The
 * secondary goal is to linearize the data so the data can be
 * passed to crypto hardware in the most efficient manner possible.
 */
struct mbuf *
m_clone(struct mbuf *m0)
{
	struct mbuf *m, *mprev;
	struct mbuf *n, *mfirst, *mlast;
	int len, off;

	KASSERT(m0 != NULL, ("m_clone: null mbuf"));

	mprev = NULL;
	for (m = m0; m != NULL; m = mprev->m_next) {
		/*
		 * Regular mbufs are ignored unless there's a cluster
		 * in front of it that we can use to coalesce.  We do
		 * the latter mainly so later clusters can be coalesced
		 * also w/o having to handle them specially (i.e. convert
		 * mbuf+cluster -> cluster).  This optimization is heavily
		 * influenced by the assumption that we're running over
		 * Ethernet where MCLBYTES is large enough that the max
		 * packet size will permit lots of coalescing into a
		 * single cluster.  This in turn permits efficient
		 * crypto operations, especially when using hardware.
		 */
		if ((m->m_flags & M_EXT) == 0) {
			if (mprev && (mprev->m_flags & M_EXT) &&
			    m->m_len <= M_TRAILINGSPACE(mprev)) {
				/* XXX: this ignores mbuf types */
				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
				       mtod(m, caddr_t), m->m_len);
				mprev->m_len += m->m_len;
				mprev->m_next = m->m_next;	/* unlink from chain */
				m_free(m);			/* reclaim mbuf */
				newipsecstat.ips_mbcoalesced++;
			} else {
				mprev = m;
			}
			continue;
		}
		/*
		 * Writable mbufs are left alone (for now).  Note
		 * that for 4.x systems it's not possible to identify
		 * whether or not mbufs with external buffers are
		 * writable unless they use clusters.
		 */
		if (M_EXT_WRITABLE(m)) {
			mprev = m;
			continue;
		}

		/*
		 * Not writable, replace with a copy or coalesce with
		 * the previous mbuf if possible (since we have to copy
		 * it anyway, we try to reduce the number of mbufs and
		 * clusters so that future work is easier).
		 */
		KASSERT(m->m_flags & M_EXT,
			("m_clone: m_flags 0x%x", m->m_flags));
		/* NB: we only coalesce into a cluster or larger */
		if (mprev != NULL && (mprev->m_flags & M_EXT) &&
		    m->m_len <= M_TRAILINGSPACE(mprev)) {
			/* XXX: this ignores mbuf types */
			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
			       mtod(m, caddr_t), m->m_len);
			mprev->m_len += m->m_len;
			mprev->m_next = m->m_next;	/* unlink from chain */
			m_free(m);			/* reclaim mbuf */
			newipsecstat.ips_clcoalesced++;
			continue;
		}

		/*
		 * Allocate new space to hold the copy...
		 */
		/* XXX why can M_PKTHDR be set past the first mbuf? */
		if (mprev == NULL && (m->m_flags & M_PKTHDR)) {
			/*
			 * NB: if a packet header is present we must
			 * allocate the mbuf separately from any cluster
			 * because M_MOVE_PKTHDR will smash the data
			 * pointer and drop the M_EXT marker.
			 */
			MGETHDR(n, MB_DONTWAIT, m->m_type);
			if (n == NULL) {
				m_freem(m0);
				return (NULL);
			}
			M_MOVE_PKTHDR(n, m);
			MCLGET(n, MB_DONTWAIT);
			if ((n->m_flags & M_EXT) == 0) {
				m_free(n);
				m_freem(m0);
				return (NULL);
			}
		} else {
			n = m_getcl(MB_DONTWAIT, m->m_type, m->m_flags);
			if (n == NULL) {
				m_freem(m0);
				return (NULL);
			}
		}
		/*
		 * ... and copy the data.  We deal with jumbo mbufs
		 * (i.e. m_len > MCLBYTES) by splitting them into
		 * clusters.  We could just malloc a buffer and make
		 * it external but too many device drivers don't know
		 * how to break up the non-contiguous memory when
		 * doing DMA.
		 */
		len = m->m_len;
		off = 0;
		mfirst = n;
		mlast = NULL;
		for (;;) {
			int cc = min(len, MCLBYTES);
			memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc);
			n->m_len = cc;
			if (mlast != NULL)
				mlast->m_next = n;
			mlast = n;	
			newipsecstat.ips_clcopied++;

			len -= cc;
			if (len <= 0)
				break;
			off += cc;

			n = m_getcl(MB_DONTWAIT, m->m_type, m->m_flags);
			if (n == NULL) {
				m_freem(mfirst);
				m_freem(m0);
				return (NULL);
			}
		}
		n->m_next = m->m_next; 
		if (mprev == NULL)
			m0 = mfirst;		/* new head of chain */
		else
			mprev->m_next = mfirst;	/* replace old mbuf */
		m_free(m);			/* release old mbuf */
		mprev = mfirst;
	}
	return (m0);
}
/*
 * Ethernet output routine.
 * Encapsulate a packet of type family for the local net.
 * Assumes that ifp is actually pointer to ethercom structure.
 */
int
ssh_interceptor_ether_output(struct ifnet *ifp, struct mbuf *m0,
                             struct sockaddr *dst, struct rtentry *rt0)
{
        u_int16_t etype = 0;
        int s, error = 0, hdrcmplt = 0;
        u_char esrc[6], edst[6];
        struct mbuf *m = m0;
        struct rtentry *rt;
        struct mbuf *mcopy = (struct mbuf *)0;
        struct ether_header *eh, ehd;
#ifdef INET
        struct arphdr *ah;
#endif /* INET */
#ifdef NETATALK
        struct at_ifaddr *aa;
#endif /* NETATALK */

        if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
                senderr(ENETDOWN);
        ifp->if_lastchange = time;
        if ((rt = rt0) != NULL) {
                if ((rt->rt_flags & RTF_UP) == 0) {
                        if ((rt0 = rt = rtalloc1(dst, 1)) != NULL) {
                                rt->rt_refcnt--;
                                if (rt->rt_ifp != ifp)
                                        return (*rt->rt_ifp->if_output)
                                                        (ifp, m0, dst, rt);
                        } else
                                senderr(EHOSTUNREACH);
                }
                if ((rt->rt_flags & RTF_GATEWAY) && dst->sa_family != AF_NS) {
                        if (rt->rt_gwroute == 0)
                                goto lookup;
                        if (((rt = rt->rt_gwroute)->rt_flags & RTF_UP) == 0) {
                                rtfree(rt); rt = rt0;
                        lookup: rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1);
                                if ((rt = rt->rt_gwroute) == 0)
                                        senderr(EHOSTUNREACH);
                                /* the "G" test below also prevents rt == rt0 */
                                if ((rt->rt_flags & RTF_GATEWAY) ||
                                    (rt->rt_ifp != ifp)) {
                                        rt->rt_refcnt--;
                                        rt0->rt_gwroute = 0;
                                        senderr(EHOSTUNREACH);
                                }
                        }
                }
                if (rt->rt_flags & RTF_REJECT)
                        if (rt->rt_rmx.rmx_expire == 0 ||
                            time.tv_sec < rt->rt_rmx.rmx_expire)
                                senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
        }
        switch (dst->sa_family) {

#ifdef INET
        case AF_INET:
                if (m->m_flags & M_BCAST)
                        bcopy((caddr_t)etherbroadcastaddr, (caddr_t)edst,
                                sizeof(edst));

                else if (m->m_flags & M_MCAST) {
                        ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr,
                            (caddr_t)edst)

                } else if (!arpresolve(ifp, rt, m, dst, edst))
                        return (0);     /* if not yet resolved */
                /* If broadcasting on a simplex interface, loopback a copy */
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
                        mcopy = m_copy(m, 0, (int)M_COPYALL);
                etype = htons(ETHERTYPE_IP);
                break;

        case AF_ARP:
                ah = mtod(m, struct arphdr *);
                if (m->m_flags & M_BCAST)
                        bcopy((caddr_t)etherbroadcastaddr, (caddr_t)edst,
                                sizeof(edst));
                else
                        bcopy((caddr_t)ar_tha(ah),
                                (caddr_t)edst, sizeof(edst));

                ah->ar_hrd = htons(ARPHRD_ETHER);

                switch(ntohs(ah->ar_op)) {
                case ARPOP_REVREQUEST:
                case ARPOP_REVREPLY:
                        etype = htons(ETHERTYPE_REVARP);
                        break;

                case ARPOP_REQUEST:
                case ARPOP_REPLY:
                default:
                        etype = htons(ETHERTYPE_ARP);
                }

                break;
#endif
#ifdef INET6
        case AF_INET6:
#ifdef OLDIP6OUTPUT
                if (!nd6_resolve(ifp, rt, m, dst, (u_char *)edst))
                        return(0);      /* if not yet resolves */
#else
                if (!nd6_storelladdr(ifp, rt, m, dst, (u_char *)edst)){
                        /* this must be impossible, so we bark */
                        printf("nd6_storelladdr failed\n");
                        return(0);
                }
#endif /* OLDIP6OUTPUT */
                etype = htons(ETHERTYPE_IPV6);
                break;
#endif
#ifdef NETATALK
    case AF_APPLETALK:
                if (!aarpresolve(ifp, m, (struct sockaddr_at *)dst, edst)) {
#ifdef NETATALKDEBUG
                        printf("aarpresolv failed\n");
#endif /* NETATALKDEBUG */
                        return (0);
                }
                /*
                 * ifaddr is the first thing in at_ifaddr
                 */
                aa = (struct at_ifaddr *) at_ifawithnet(
                    (struct sockaddr_at *)dst, ifp);
                if (aa == NULL)
                    goto bad;

                /*
                 * In the phase 2 case, we need to prepend an mbuf for the
                 * llc header.  Since we must preserve the value of m,
                 * which is passed to us by value, we m_copy() the first
                 * mbuf, and use it for our llc header.
                 */
                if (aa->aa_flags & AFA_PHASE2) {
                        struct llc llc;

                        M_PREPEND(m, sizeof(struct llc), M_DONTWAIT);
                        llc.llc_dsap = llc.llc_ssap = LLC_SNAP_LSAP;
                        llc.llc_control = LLC_UI;
                        bcopy(at_org_code, llc.llc_snap_org_code,
                            sizeof(llc.llc_snap_org_code));
                        llc.llc_snap_ether_type = htons(ETHERTYPE_ATALK);
                        bcopy(&llc, mtod(m, caddr_t), sizeof(struct llc));
                } else {
                        etype = htons(ETHERTYPE_ATALK);
                }
                break;
#endif /* NETATALK */
#ifdef NS
        case AF_NS:
                etype = htons(ETHERTYPE_NS);
                bcopy((caddr_t)&(((struct sockaddr_ns *)dst)->sns_addr.x_host),
                    (caddr_t)edst, sizeof (edst));
                if (!bcmp((caddr_t)edst, (caddr_t)&ns_thishost, sizeof(edst)))
                        return (looutput(ifp, m, dst, rt));
                /* If broadcasting on a simplex interface, loopback a copy */
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
                        mcopy = m_copy(m, 0, (int)M_COPYALL);
                break;
#endif
#ifdef IPX
        case AF_IPX:
                etype = htons(ETHERTYPE_IPX);
                bcopy((caddr_t)&(((struct sockaddr_ipx *)dst)->sipx_addr.x_host),
                    (caddr_t)edst, sizeof (edst));
                /* If broadcasting on a simplex interface, loopback a copy */
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX))
                        mcopy = m_copy(m, 0, (int)M_COPYALL);
                break;
#endif
#ifdef  ISO
        case AF_ISO: {
                int     snpalen;
                struct  llc *l;
                struct sockaddr_dl *sdl;

                if (rt && (sdl = (struct sockaddr_dl *)rt->rt_gateway) &&
                    sdl->sdl_family == AF_LINK && sdl->sdl_alen > 0) {
                        bcopy(LLADDR(sdl), (caddr_t)edst, sizeof(edst));
                } else {
                        error = iso_snparesolve(ifp, (struct sockaddr_iso *)dst,
                                                (char *)edst, &snpalen);
                        if (error)
                                goto bad; /* Not Resolved */
                }
                /* If broadcasting on a simplex interface, loopback a copy */
                if (*edst & 1)
                        m->m_flags |= (M_BCAST|M_MCAST);
                if ((m->m_flags & M_BCAST) && (ifp->if_flags & IFF_SIMPLEX) &&
                    (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
                        M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
                        if (mcopy) {
                                eh = mtod(mcopy, struct ether_header *);
                                bcopy((caddr_t)edst,
                                      (caddr_t)eh->ether_dhost, sizeof (edst));
                                bcopy(LLADDR(ifp->if_sadl),
                                      (caddr_t)eh->ether_shost, sizeof (edst));
                        }
                }
                M_PREPEND(m, 3, M_DONTWAIT);
                if (m == NULL)
                        return (0);
                l = mtod(m, struct llc *);
                l->llc_dsap = l->llc_ssap = LLC_ISO_LSAP;
                l->llc_control = LLC_UI;
#ifdef ARGO_DEBUG
                if (argo_debug[D_ETHER]) {
                        int i;
                        printf("unoutput: sending pkt to: ");
                        for (i=0; i<6; i++)
                                printf("%x ", edst[i] & 0xff);
                        printf("\n");
                }
#endif
                } break;
#endif /* ISO */
#ifdef  LLC
/*      case AF_NSAP: */
        case AF_CCITT: {
                struct sockaddr_dl *sdl =
                        (struct sockaddr_dl *) rt -> rt_gateway;

                if (sdl && sdl->sdl_family == AF_LINK
                    && sdl->sdl_alen > 0) {
                        bcopy(LLADDR(sdl), (char *)edst,
                                sizeof(edst));
                } else goto bad; /* Not a link interface ? Funny ... */
                if ((ifp->if_flags & IFF_SIMPLEX) && (*edst & 1) &&
                    (mcopy = m_copy(m, 0, (int)M_COPYALL))) {
                        M_PREPEND(mcopy, sizeof (*eh), M_DONTWAIT);
                        if (mcopy) {
                                eh = mtod(mcopy, struct ether_header *);
                                bcopy((caddr_t)edst,
                                      (caddr_t)eh->ether_dhost, sizeof (edst));
                                bcopy(LLADDR(ifp->if_sadl),
                                      (caddr_t)eh->ether_shost, sizeof (edst));
                        }
                }
#ifdef LLC_DEBUG
                {
                        int i;
                        struct llc *l = mtod(m, struct llc *);

                        printf("ether_output: sending LLC2 pkt to: ");
                        for (i=0; i<6; i++)
                                printf("%x ", edst[i] & 0xff);
                        printf(" len 0x%x dsap 0x%x ssap 0x%x control 0x%x\n",
                            m->m_pkthdr.len, l->llc_dsap & 0xff, l->llc_ssap &0xff,
                            l->llc_control & 0xff);

                }
#endif /* LLC_DEBUG */
                } break;
struct mbuf *
ieee80211_ccmp_encrypt(struct ieee80211com *ic, struct mbuf *m0,
    struct ieee80211_key *k)
{
	struct ieee80211_ccmp_ctx *ctx = k->k_priv;
	const struct ieee80211_frame *wh;
	const u_int8_t *src;
	u_int8_t *ivp, *mic, *dst;
	u_int8_t a[16], b[16], s0[16], s[16];
	struct mbuf *n0, *m, *n;
	int hdrlen, left, moff, noff, len;
	u_int16_t ctr;
	int i, j;

	MGET(n0, M_DONTWAIT, m0->m_type);
	if (n0 == NULL)
		goto nospace;
	if (m_dup_pkthdr(n0, m0, M_DONTWAIT))
		goto nospace;
	n0->m_pkthdr.len += IEEE80211_CCMP_HDRLEN;
	n0->m_len = MHLEN;
	if (n0->m_pkthdr.len >= MINCLSIZE - IEEE80211_CCMP_MICLEN) {
		MCLGET(n0, M_DONTWAIT);
		if (n0->m_flags & M_EXT)
			n0->m_len = n0->m_ext.ext_size;
	}
	if (n0->m_len > n0->m_pkthdr.len)
		n0->m_len = n0->m_pkthdr.len;

	/* copy 802.11 header */
	wh = mtod(m0, struct ieee80211_frame *);
	hdrlen = ieee80211_get_hdrlen(wh);
	memcpy(mtod(n0, caddr_t), wh, hdrlen);

	k->k_tsc++;	/* increment the 48-bit PN */

	/* construct CCMP header */
	ivp = mtod(n0, u_int8_t *) + hdrlen;
	ivp[0] = k->k_tsc;		/* PN0 */
	ivp[1] = k->k_tsc >> 8;		/* PN1 */
	ivp[2] = 0;			/* Rsvd */
	ivp[3] = k->k_id << 6 | IEEE80211_WEP_EXTIV;	/* KeyID | ExtIV */
	ivp[4] = k->k_tsc >> 16;	/* PN2 */
	ivp[5] = k->k_tsc >> 24;	/* PN3 */
	ivp[6] = k->k_tsc >> 32;	/* PN4 */
	ivp[7] = k->k_tsc >> 40;	/* PN5 */

	/* construct initial B, A and S_0 blocks */
	ieee80211_ccmp_phase1(&ctx->rijndael, wh, k->k_tsc,
	    m0->m_pkthdr.len - hdrlen, b, a, s0);

	/* construct S_1 */
	ctr = 1;
	a[14] = ctr >> 8;
	a[15] = ctr & 0xff;
	rijndael_encrypt(&ctx->rijndael, a, s);

	/* encrypt frame body and compute MIC */
	j = 0;
	m = m0;
	n = n0;
	moff = hdrlen;
	noff = hdrlen + IEEE80211_CCMP_HDRLEN;
	left = m0->m_pkthdr.len - moff;
	while (left > 0) {
		if (moff == m->m_len) {
			/* nothing left to copy from m */
			m = m->m_next;
			moff = 0;
		}
		if (noff == n->m_len) {
			/* n is full and there's more data to copy */
			MGET(n->m_next, M_DONTWAIT, n->m_type);
			if (n->m_next == NULL)
				goto nospace;
			n = n->m_next;
			n->m_len = MLEN;
			if (left >= MINCLSIZE - IEEE80211_CCMP_MICLEN) {
				MCLGET(n, M_DONTWAIT);
				if (n->m_flags & M_EXT)
					n->m_len = n->m_ext.ext_size;
			}
			if (n->m_len > left)
				n->m_len = left;
			noff = 0;
		}
		len = min(m->m_len - moff, n->m_len - noff);

		src = mtod(m, u_int8_t *) + moff;
		dst = mtod(n, u_int8_t *) + noff;
		for (i = 0; i < len; i++) {
			/* update MIC with clear text */
			b[j] ^= src[i];
			/* encrypt message */
			dst[i] = src[i] ^ s[j];
			if (++j < 16)
				continue;
			/* we have a full block, encrypt MIC */
			rijndael_encrypt(&ctx->rijndael, b, b);
			/* construct a new S_ctr block */
			ctr++;
			a[14] = ctr >> 8;
			a[15] = ctr & 0xff;
			rijndael_encrypt(&ctx->rijndael, a, s);
			j = 0;
		}

		moff += len;
		noff += len;
		left -= len;
	}
	if (j != 0)	/* partial block, encrypt MIC */
		rijndael_encrypt(&ctx->rijndael, b, b);

	/* reserve trailing space for MIC */
	if (M_TRAILINGSPACE(n) < IEEE80211_CCMP_MICLEN) {
		MGET(n->m_next, M_DONTWAIT, n->m_type);
		if (n->m_next == NULL)
			goto nospace;
		n = n->m_next;
		n->m_len = 0;
	}
	/* finalize MIC, U := T XOR first-M-bytes( S_0 ) */
	mic = mtod(n, u_int8_t *) + n->m_len;
	for (i = 0; i < IEEE80211_CCMP_MICLEN; i++)
		mic[i] = b[i] ^ s0[i];
	n->m_len += IEEE80211_CCMP_MICLEN;
	n0->m_pkthdr.len += IEEE80211_CCMP_MICLEN;

	m_freem(m0);
	return n0;
 nospace:
	ic->ic_stats.is_tx_nombuf++;
	m_freem(m0);
	if (n0 != NULL)
		m_freem(n0);
	return NULL;
}
Beispiel #8
0
int
sosend(struct socket *so, 
       struct mbuf *nam,      /* sockaddr, if UDP socket, NULL if TCP */
       char  *data,           /* data to send */
       int   *data_length,    /* IN/OUT  length of (remaining) data */
       int   flags)
{
   struct mbuf *head = (struct mbuf *)NULL;
   struct mbuf *m;
   int   space;
   int   resid;
   int   len;
   int   error = 0;
   int   dontroute;
   int   first = 1;

   resid = *data_length;

   /*
    * In theory resid should be unsigned.
    * However, space must be signed, as it might be less than 0
    * if we over-committed, and we must use a signed comparison
    * of space and resid.  On the other hand, a negative resid
    * causes us to loop sending 0-length segments to the protocol.
    */
   if (resid < 0)
      return (EINVAL);

   INET_TRACE (INETM_IO, ("INET:sosend: so %lx resid %d sb_hiwat %d so_state %x\n",
               so, resid, so->so_snd.sb_hiwat, so->so_state));

   if (sosendallatonce(so) && (resid > (int)so->so_snd.sb_hiwat))
      return (EMSGSIZE);

   dontroute = (flags & MSG_DONTROUTE) &&
               ((so->so_options & SO_DONTROUTE) == 0) &&
               (so->so_proto->pr_flags & PR_ATOMIC);

#define     snderr(errno)     {  error =  errno;   goto  release; }

restart:
   sblock(&so->so_snd);
   do 
   {
      if (so->so_error) 
      {
         error = so->so_error;
         so->so_error = 0;          /* ??? */
         goto release;
      }
      if (so->so_state & SS_CANTSENDMORE)
         snderr(EPIPE);
      if ((so->so_state & SS_ISCONNECTED) == 0) 
      {
         if (so->so_proto->pr_flags & PR_CONNREQUIRED)
            snderr(ENOTCONN);
         if (nam == 0)
            snderr(EDESTADDRREQ);
      }
      if (flags & MSG_OOB)
         space = 1024;
      else 
      {
         space = (int)sbspace(&so->so_snd);
         if ((sosendallatonce(so) && (space < resid)) ||
             ((resid >= CLBYTES) && (space < CLBYTES) &&
              (so->so_snd.sb_cc >= CLBYTES) &&
              ((so->so_state & SS_NBIO) == 0) &&
              ((flags & MSG_DONTWAIT) == 0)))
         {
            if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT))
            {
               if (first)
                  error = EWOULDBLOCK;
               goto release;
            }
            sbunlock(&so->so_snd);
            sbwait(&so->so_snd);
            goto restart;
         }
      }
      if ( space <= 0 ) 
      {
         /* no space in socket send buffer - see if we can wait */
         if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT))
         {
            if (first)     /* report first error */
               error = EWOULDBLOCK;
            goto release;
         }
         /* If blocking socket, let someone else run */
         sbunlock(&so->so_snd);
         sbwait(&so->so_snd);
         goto restart;
      }

      while (space > 0) 
      {
         len = resid;
         if ( so->so_type == SOCK_STREAM )
         {
            m = m_getwithdata(MT_TXDATA, len);
            if (!m)   
               snderr(ENOBUFS);
            MEMCPY(m->m_data, data, len);
            so->so_snd.sb_flags |= SB_MBCOMP;   /* allow compression */
         }
         else
         {
            m = m_get (M_WAIT, MT_TXDATA);
            m->m_data = data;
         }
         INET_TRACE (INETM_IO,
          ("sosend:got %d bytes so %lx mlen %d, off %d mtod %x\n",
             len, so, m->m_len, m->m_off, mtod (m, caddr_t)));

         *data_length -= len;
         resid -= len;
         data += len;
         m->m_len = len;
         if (head == (struct mbuf *)NULL)
            head = m;
         if (error)
            goto release;
         if (*data_length <= 0)
            break;
      }

      if (dontroute)
         so->so_options |= SO_DONTROUTE;

      so->so_req = (flags & MSG_OOB) ? PRU_SENDOOB : PRU_SEND;
      error = (*so->so_proto->pr_usrreq)(so, head, nam);

      if (dontroute)
         so->so_options &= ~SO_DONTROUTE;

      head = (struct mbuf *)NULL;
      first = 0;
   } while ((resid != 0) && (error == 0));

release:
   sbunlock(&so->so_snd);  
   if (head)
      m_freem(head);
   return error;
}
Beispiel #9
0
static void
nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
{
	struct nfsrv_descript nd;
	struct nfsrvcache *rp = NULL;
	int cacherep, credflavor;

	memset(&nd, 0, sizeof(nd));
	if (rqst->rq_vers == NFS_VER2) {
		if (rqst->rq_proc > NFSV2PROC_STATFS) {
			svcerr_noproc(rqst);
			svc_freereq(rqst);
			goto out;
		}
		nd.nd_procnum = newnfs_nfsv3_procid[rqst->rq_proc];
		nd.nd_flag = ND_NFSV2;
	} else if (rqst->rq_vers == NFS_VER3) {
		if (rqst->rq_proc >= NFS_V3NPROCS) {
			svcerr_noproc(rqst);
			svc_freereq(rqst);
			goto out;
		}
		nd.nd_procnum = rqst->rq_proc;
		nd.nd_flag = ND_NFSV3;
	} else {
		if (rqst->rq_proc != NFSPROC_NULL &&
		    rqst->rq_proc != NFSV4PROC_COMPOUND) {
			svcerr_noproc(rqst);
			svc_freereq(rqst);
			goto out;
		}
		nd.nd_procnum = rqst->rq_proc;
		nd.nd_flag = ND_NFSV4;
	}

	/*
	 * Note: we want rq_addr, not svc_getrpccaller for nd_nam2 -
	 * NFS_SRVMAXDATA uses a NULL value for nd_nam2 to detect TCP
	 * mounts.
	 */
	nd.nd_mrep = rqst->rq_args;
	rqst->rq_args = NULL;
	newnfs_realign(&nd.nd_mrep);
	nd.nd_md = nd.nd_mrep;
	nd.nd_dpos = mtod(nd.nd_md, caddr_t);
	nd.nd_nam = svc_getrpccaller(rqst);
	nd.nd_nam2 = rqst->rq_addr;
	nd.nd_mreq = NULL;
	nd.nd_cred = NULL;

	if (nfs_privport && (nd.nd_flag & ND_NFSV4) == 0) {
		/* Check if source port is privileged */
		u_short port;
		struct sockaddr *nam = nd.nd_nam;
		struct sockaddr_in *sin;

		sin = (struct sockaddr_in *)nam;
		/*
		 * INET/INET6 - same code:
		 *    sin_port and sin6_port are at same offset
		 */
		port = ntohs(sin->sin_port);
		if (port >= IPPORT_RESERVED &&
		    nd.nd_procnum != NFSPROC_NULL) {
#ifdef INET6
			char b6[INET6_ADDRSTRLEN];
#if defined(KLD_MODULE)
			/* Do not use ip6_sprintf: the nfs module should work without INET6. */
#define	ip6_sprintf(buf, a)						\
			(sprintf((buf), "%x:%x:%x:%x:%x:%x:%x:%x",	\
			    (a)->s6_addr16[0], (a)->s6_addr16[1],	\
			    (a)->s6_addr16[2], (a)->s6_addr16[3],	\
			    (a)->s6_addr16[4], (a)->s6_addr16[5],	\
			    (a)->s6_addr16[6], (a)->s6_addr16[7]),	\
			    (buf))
#endif
#endif
			printf("NFS request from unprivileged port (%s:%d)\n",
#ifdef INET6
			    sin->sin_family == AF_INET6 ?
			    ip6_sprintf(b6, &satosin6(sin)->sin6_addr) :
#if defined(KLD_MODULE)
#undef ip6_sprintf
#endif
#endif
			    inet_ntoa(sin->sin_addr), port);
			svcerr_weakauth(rqst);
			svc_freereq(rqst);
			m_freem(nd.nd_mrep);
			goto out;
		}
	}

	if (nd.nd_procnum != NFSPROC_NULL) {
		if (!svc_getcred(rqst, &nd.nd_cred, &credflavor)) {
			svcerr_weakauth(rqst);
			svc_freereq(rqst);
			m_freem(nd.nd_mrep);
			goto out;
		}

		/* Set the flag based on credflavor */
		if (credflavor == RPCSEC_GSS_KRB5) {
			nd.nd_flag |= ND_GSS;
		} else if (credflavor == RPCSEC_GSS_KRB5I) {
			nd.nd_flag |= (ND_GSS | ND_GSSINTEGRITY);
		} else if (credflavor == RPCSEC_GSS_KRB5P) {
			nd.nd_flag |= (ND_GSS | ND_GSSPRIVACY);
		} else if (credflavor != AUTH_SYS) {
			svcerr_weakauth(rqst);
			svc_freereq(rqst);
			m_freem(nd.nd_mrep);
			goto out;
		}

#ifdef MAC
		mac_cred_associate_nfsd(nd.nd_cred);
#endif
		if ((nd.nd_flag & ND_NFSV4) != 0) {
			nd.nd_repstat = nfsvno_v4rootexport(&nd);
			if (nd.nd_repstat != 0) {
				svcerr_weakauth(rqst);
				svc_freereq(rqst);
				m_freem(nd.nd_mrep);
				goto out;
			}
		}

		cacherep = nfs_proc(&nd, rqst->rq_xid, xprt->xp_socket,
		    xprt->xp_sockref, &rp);
	} else {
		NFSMGET(nd.nd_mreq);
		nd.nd_mreq->m_len = 0;
		cacherep = RC_REPLY;
	}
	if (nd.nd_mrep != NULL)
		m_freem(nd.nd_mrep);

	if (nd.nd_cred != NULL)
		crfree(nd.nd_cred);

	if (cacherep == RC_DROPIT) {
		if (nd.nd_mreq != NULL)
			m_freem(nd.nd_mreq);
		svc_freereq(rqst);
		goto out;
	}

	if (nd.nd_mreq == NULL) {
		svcerr_decode(rqst);
		svc_freereq(rqst);
		goto out;
	}

	if (nd.nd_repstat & NFSERR_AUTHERR) {
		svcerr_auth(rqst, nd.nd_repstat & ~NFSERR_AUTHERR);
		if (nd.nd_mreq != NULL)
			m_freem(nd.nd_mreq);
	} else if (!svc_sendreply_mbuf(rqst, nd.nd_mreq)) {
		svcerr_systemerr(rqst);
	}
	if (rp != NULL)
		nfsrvd_sentcache(rp, xprt->xp_socket, 0);
	svc_freereq(rqst);

out:
	NFSEXITCODE(0);
}
Beispiel #10
0
/*
 * Set up nameidata for a lookup() call and do it.
 *
 * If pubflag is set, this call is done for a lookup operation on the
 * public filehandle. In that case we allow crossing mountpoints and
 * absolute pathnames. However, the caller is expected to check that
 * the lookup result is within the public fs, and deny access if
 * it is not.
 *
 * nfs_namei() clears out garbage fields that namei() might leave garbage.
 * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
 * error occurs but the parent was not requested.
 *
 * dirp may be set whether an error is returned or not, and must be
 * released by the caller.
 */
int
nfs_namei(struct nameidata *ndp, struct nfsrv_descript *nfsd,
    fhandle_t *fhp, int len, struct nfssvc_sock *slp,
    struct sockaddr *nam, struct mbuf **mdp,
    caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp,
    int *retdirattr_retp, int pubflag)
{
	int i, rem;
	struct mbuf *md;
	char *fromcp, *tocp, *cp;
	struct iovec aiov;
	struct uio auio;
	struct vnode *dp;
	int error, rdonly, linklen;
	struct componentname *cnp = &ndp->ni_cnd;
	int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0;
	int dvfslocked;
	int vfslocked;

	vfslocked = 0;
	dvfslocked = 0;
	*retdirp = NULL;
	cnp->cn_flags |= NOMACCHECK;
	cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);

	/*
	 * Copy the name from the mbuf list to ndp->ni_pnbuf
	 * and set the various ndp fields appropriately.
	 */
	fromcp = *dposp;
	tocp = cnp->cn_pnbuf;
	md = *mdp;
	rem = mtod(md, caddr_t) + md->m_len - fromcp;
	for (i = 0; i < len; i++) {
		while (rem == 0) {
			md = md->m_next;
			if (md == NULL) {
				error = EBADRPC;
				goto out;
			}
			fromcp = mtod(md, caddr_t);
			rem = md->m_len;
		}
		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
			error = EACCES;
			goto out;
		}
		*tocp++ = *fromcp++;
		rem--;
	}
	*tocp = '\0';
	*mdp = md;
	*dposp = fromcp;
	len = nfsm_rndup(len)-len;
	if (len > 0) {
		if (rem >= len)
			*dposp += len;
		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
			goto out;
	}

	/*
	 * Extract and set starting directory.
	 */
	error = nfsrv_fhtovp(fhp, FALSE, &dp, &dvfslocked,
	    nfsd, slp, nam, &rdonly, pubflag);
	if (error)
		goto out;
	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
	if (dp->v_type != VDIR) {
		vrele(dp);
		error = ENOTDIR;
		goto out;
	}

	if (rdonly)
		cnp->cn_flags |= RDONLY;

	/*
	 * Set return directory.  Reference to dp is implicitly transfered
	 * to the returned pointer
	 */
	*retdirp = dp;
	if (v3) {
		vn_lock(dp, LK_EXCLUSIVE | LK_RETRY);
		*retdirattr_retp = VOP_GETATTR(dp, retdirattrp,
			ndp->ni_cnd.cn_cred);
		VOP_UNLOCK(dp, 0);
	}

	if (pubflag) {
		/*
		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
		 * and the 'native path' indicator.
		 */
		cp = uma_zalloc(namei_zone, M_WAITOK);
		fromcp = cnp->cn_pnbuf;
		tocp = cp;
		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
			switch ((unsigned char)*fromcp) {
			case WEBNFS_NATIVE_CHAR:
				/*
				 * 'Native' path for us is the same
				 * as a path according to the NFS spec,
				 * just skip the escape char.
				 */
				fromcp++;
				break;
			/*
			 * More may be added in the future, range 0x80-0xff
			 */
			default:
				error = EIO;
				uma_zfree(namei_zone, cp);
				goto out;
			}
		}
		/*
		 * Translate the '%' escapes, URL-style.
		 */
		while (*fromcp != '\0') {
			if (*fromcp == WEBNFS_ESC_CHAR) {
				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
					fromcp++;
					*tocp++ = HEXSTRTOI(fromcp);
					fromcp += 2;
					continue;
				} else {
					error = ENOENT;
					uma_zfree(namei_zone, cp);
					goto out;
				}
			} else
				*tocp++ = *fromcp++;
		}
		*tocp = '\0';
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		cnp->cn_pnbuf = cp;
	}

	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
	ndp->ni_segflg = UIO_SYSSPACE;

	if (pubflag) {
		ndp->ni_rootdir = rootvnode;
		ndp->ni_loopcnt = 0;
		if (cnp->cn_pnbuf[0] == '/') {
			int tvfslocked;

			tvfslocked = VFS_LOCK_GIANT(rootvnode->v_mount);
			VFS_UNLOCK_GIANT(vfslocked);
			dp = rootvnode;
			vfslocked = tvfslocked;
		}
	} else {
		cnp->cn_flags |= NOCROSSMOUNT;
	}

	/*
	 * Initialize for scan, set ni_startdir and bump ref on dp again
	 * because lookup() will dereference ni_startdir.
	 */

	cnp->cn_thread = curthread;
	VREF(dp);
	ndp->ni_startdir = dp;

	if (!lockleaf)
		cnp->cn_flags |= LOCKLEAF;
	for (;;) {
		cnp->cn_nameptr = cnp->cn_pnbuf;
		/*
		 * Call lookup() to do the real work.  If an error occurs,
		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
		 * we do not have to dereference anything before returning.
		 * In either case ni_startdir will be dereferenced and NULLed
		 * out.
		 */
		if (vfslocked)
			ndp->ni_cnd.cn_flags |= GIANTHELD;
		error = lookup(ndp);
		vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0;
		ndp->ni_cnd.cn_flags &= ~GIANTHELD;
		if (error)
			break;

		/*
		 * Check for encountering a symbolic link.  Trivial
		 * termination occurs if no symlink encountered.
		 * Note: zfree is safe because error is 0, so we will
		 * not zfree it again when we break.
		 */
		if ((cnp->cn_flags & ISSYMLINK) == 0) {
			if (cnp->cn_flags & (SAVENAME | SAVESTART))
				cnp->cn_flags |= HASBUF;
			else
				uma_zfree(namei_zone, cnp->cn_pnbuf);
			if (ndp->ni_vp && !lockleaf)
				VOP_UNLOCK(ndp->ni_vp, 0);
			break;
		}

		/*
		 * Validate symlink
		 */
		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
			VOP_UNLOCK(ndp->ni_dvp, 0);
		if (!pubflag) {
			error = EINVAL;
			goto badlink2;
		}

		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
			error = ELOOP;
			goto badlink2;
		}
		if (ndp->ni_pathlen > 1)
			cp = uma_zalloc(namei_zone, M_WAITOK);
		else
			cp = cnp->cn_pnbuf;
		aiov.iov_base = cp;
		aiov.iov_len = MAXPATHLEN;
		auio.uio_iov = &aiov;
		auio.uio_iovcnt = 1;
		auio.uio_offset = 0;
		auio.uio_rw = UIO_READ;
		auio.uio_segflg = UIO_SYSSPACE;
		auio.uio_td = NULL;
		auio.uio_resid = MAXPATHLEN;
		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
		if (error) {
		badlink1:
			if (ndp->ni_pathlen > 1)
				uma_zfree(namei_zone, cp);
		badlink2:
			vput(ndp->ni_vp);
			vrele(ndp->ni_dvp);
			break;
		}
		linklen = MAXPATHLEN - auio.uio_resid;
		if (linklen == 0) {
			error = ENOENT;
			goto badlink1;
		}
		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
			error = ENAMETOOLONG;
			goto badlink1;
		}

		/*
		 * Adjust or replace path
		 */
		if (ndp->ni_pathlen > 1) {
			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
			uma_zfree(namei_zone, cnp->cn_pnbuf);
			cnp->cn_pnbuf = cp;
		} else
			cnp->cn_pnbuf[linklen] = '\0';
		ndp->ni_pathlen += linklen;

		/*
		 * Cleanup refs for next loop and check if root directory
		 * should replace current directory.  Normally ni_dvp
		 * becomes the new base directory and is cleaned up when
		 * we loop.  Explicitly null pointers after invalidation
		 * to clarify operation.
		 */
		vput(ndp->ni_vp);
		ndp->ni_vp = NULL;

		if (cnp->cn_pnbuf[0] == '/') {
			vrele(ndp->ni_dvp);
			ndp->ni_dvp = ndp->ni_rootdir;
			VREF(ndp->ni_dvp);
		}
		ndp->ni_startdir = ndp->ni_dvp;
		ndp->ni_dvp = NULL;
	}
	if (!lockleaf)
		cnp->cn_flags &= ~LOCKLEAF;
	if (cnp->cn_flags & GIANTHELD) {
		mtx_unlock(&Giant);
		cnp->cn_flags &= ~GIANTHELD;
	}

	/*
	 * nfs_namei() guarentees that fields will not contain garbage
	 * whether an error occurs or not.  This allows the caller to track
	 * cleanup state trivially.
	 */
out:
	if (error) {
		uma_zfree(namei_zone, cnp->cn_pnbuf);
		ndp->ni_vp = NULL;
		ndp->ni_dvp = NULL;
		ndp->ni_startdir = NULL;
		cnp->cn_flags &= ~HASBUF;
		VFS_UNLOCK_GIANT(vfslocked);
		vfslocked = 0;
	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
		ndp->ni_dvp = NULL;
	}
	/*
	 * This differs from normal namei() in that even on failure we may
	 * return with Giant held due to the dirp return.  Make sure we only
	 * have not recursed however.  The calling code only expects to drop
	 * one acquire.
	 */
	if (vfslocked || dvfslocked)
		ndp->ni_cnd.cn_flags |= GIANTHELD;
	if (vfslocked && dvfslocked)
		VFS_UNLOCK_GIANT(vfslocked);
	return (error);
}
Beispiel #11
0
/*
 * ESP output routine, called by ipsec[46]_process_packet().
 */
static int
esp_output(
	struct mbuf *m,
	struct ipsecrequest *isr,
	struct mbuf **mp,
	int skip,
	int protoff
)
{
	struct enc_xform *espx;
	struct auth_hash *esph;
	int hlen, rlen, plen, padding, blks, alen, i, roff;
	struct mbuf *mo = (struct mbuf *) NULL;
	struct tdb_crypto *tc;
	struct secasvar *sav;
	struct secasindex *saidx;
	unsigned char *pad;
	u_int8_t prot;
	int error, maxpacketsize;

	struct cryptodesc *crde = NULL, *crda = NULL;
	struct cryptop *crp;

	SPLASSERT(net, "esp_output");

	sav = isr->sav;
	KASSERT(sav != NULL, ("esp_output: null SA"));
	esph = sav->tdb_authalgxform;
	espx = sav->tdb_encalgxform;
	KASSERT(espx != NULL, ("esp_output: null encoding xform"));

	if (sav->flags & SADB_X_EXT_OLD)
		hlen = sizeof (struct esp) + sav->ivlen;
	else
		hlen = sizeof (struct newesp) + sav->ivlen;

	rlen = m->m_pkthdr.len - skip;	/* Raw payload length. */
	/*
	 * NB: The null encoding transform has a blocksize of 4
	 *     so that headers are properly aligned.
	 */
	blks = espx->blocksize;		/* IV blocksize */

	/* XXX clamp padding length a la KAME??? */
	padding = ((blks - ((rlen + 2) % blks)) % blks) + 2;
	plen = rlen + padding;		/* Padded payload length. */

	if (esph)
		alen = AH_HMAC_HASHLEN;
	else
		alen = 0;

	espstat.esps_output++;

	saidx = &sav->sah->saidx;
	/* Check for maximum packet size violations. */
	switch (saidx->dst.sa.sa_family) {
#ifdef INET
	case AF_INET:
		maxpacketsize = IP_MAXPACKET;
		break;
#endif /* INET */
#ifdef INET6
	case AF_INET6:
		maxpacketsize = IPV6_MAXPACKET;
		break;
#endif /* INET6 */
	default:
		DPRINTF(("esp_output: unknown/unsupported protocol "
		    "family %d, SA %s/%08lx\n",
		    saidx->dst.sa.sa_family, ipsec_address(&saidx->dst),
		    (u_long) ntohl(sav->spi)));
		espstat.esps_nopf++;
		error = EPFNOSUPPORT;
		goto bad;
	}
	if (skip + hlen + rlen + padding + alen > maxpacketsize) {
		DPRINTF(("esp_output: packet in SA %s/%08lx got too big "
		    "(len %u, max len %u)\n",
		    ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi),
		    skip + hlen + rlen + padding + alen, maxpacketsize));
		espstat.esps_toobig++;
		error = EMSGSIZE;
		goto bad;
	}

	/* Update the counters. */
	espstat.esps_obytes += m->m_pkthdr.len - skip;

	m = m_clone(m);
	if (m == NULL) {
		DPRINTF(("esp_output: cannot clone mbuf chain, SA %s/%08lx\n",
		    ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
		espstat.esps_hdrops++;
		error = ENOBUFS;
		goto bad;
	}

	/* Inject ESP header. */
	mo = m_makespace(m, skip, hlen, &roff);
	if (mo == NULL) {
		DPRINTF(("esp_output: failed to inject %u byte ESP hdr for SA "
		    "%s/%08lx\n",
		    hlen, ipsec_address(&saidx->dst),
		    (u_long) ntohl(sav->spi)));
		espstat.esps_hdrops++;		/* XXX diffs from openbsd */
		error = ENOBUFS;
		goto bad;
	}

	/* Initialize ESP header. */
	bcopy((caddr_t) &sav->spi, mtod(mo, caddr_t) + roff, sizeof(u_int32_t));
	if (sav->replay) {
		u_int32_t replay = htonl(++(sav->replay->count));
		bcopy((caddr_t) &replay,
		    mtod(mo, caddr_t) + roff + sizeof(u_int32_t),
		    sizeof(u_int32_t));
	}

	/*
	 * Add padding -- better to do it ourselves than use the crypto engine,
	 * although if/when we support compression, we'd have to do that.
	 */
	pad = (u_char *) m_pad(m, padding + alen);
	if (pad == NULL) {
		DPRINTF(("esp_output: m_pad failed for SA %s/%08lx\n",
		    ipsec_address(&saidx->dst), (u_long) ntohl(sav->spi)));
		m = NULL;		/* NB: free'd by m_pad */
		error = ENOBUFS;
		goto bad;
	}

	/*
	 * Add padding: random, zero, or self-describing.
	 * XXX catch unexpected setting
	 */
	switch (sav->flags & SADB_X_EXT_PMASK) {
	case SADB_X_EXT_PRAND:
		(void) read_random(pad, padding - 2);
		break;
	case SADB_X_EXT_PZERO:
		bzero(pad, padding - 2);
		break;
	case SADB_X_EXT_PSEQ:
		for (i = 0; i < padding - 2; i++)
			pad[i] = i+1;
		break;
	}

	/* Fix padding length and Next Protocol in padding itself. */
	pad[padding - 2] = padding - 2;
	m_copydata(m, protoff, sizeof(u_int8_t), pad + padding - 1);

	/* Fix Next Protocol in IPv4/IPv6 header. */
	prot = IPPROTO_ESP;
	m_copyback(m, protoff, sizeof(u_int8_t), (u_char *) &prot);

	/* Get crypto descriptors. */
	crp = crypto_getreq(esph && espx ? 2 : 1);
	if (crp == NULL) {
		DPRINTF(("esp_output: failed to acquire crypto descriptors\n"));
		espstat.esps_crypto++;
		error = ENOBUFS;
		goto bad;
	}

	if (espx) {
		crde = crp->crp_desc;
		crda = crde->crd_next;

		/* Encryption descriptor. */
		crde->crd_skip = skip + hlen;
		crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
		crde->crd_flags = CRD_F_ENCRYPT;
		crde->crd_inject = skip + hlen - sav->ivlen;

		/* Encryption operation. */
		crde->crd_alg = espx->type;
		crde->crd_key = _KEYBUF(sav->key_enc);
		crde->crd_klen = _KEYBITS(sav->key_enc);
		/* XXX Rounds ? */
	} else
		crda = crp->crp_desc;

	/* IPsec-specific opaque crypto info. */
	tc = (struct tdb_crypto *) malloc(sizeof(struct tdb_crypto),
		M_XDATA, M_NOWAIT|M_ZERO);
	if (tc == NULL) {
		crypto_freereq(crp);
		DPRINTF(("esp_output: failed to allocate tdb_crypto\n"));
		espstat.esps_crypto++;
		error = ENOBUFS;
		goto bad;
	}

	/* Callback parameters */
	tc->tc_isr = isr;
	tc->tc_spi = sav->spi;
	tc->tc_dst = saidx->dst;
	tc->tc_proto = saidx->proto;

	/* Crypto operation descriptor. */
	crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
	crp->crp_flags = CRYPTO_F_IMBUF;
	crp->crp_buf = (caddr_t) m;
	crp->crp_callback = esp_output_cb;
	crp->crp_opaque = (caddr_t) tc;
	crp->crp_sid = sav->tdb_cryptoid;

	if (esph) {
		/* Authentication descriptor. */
		crda->crd_skip = skip;
		crda->crd_len = m->m_pkthdr.len - (skip + alen);
		crda->crd_inject = m->m_pkthdr.len - alen;

		/* Authentication operation. */
		crda->crd_alg = esph->type;
		crda->crd_key = _KEYBUF(sav->key_auth);
		crda->crd_klen = _KEYBITS(sav->key_auth);
	}

	return crypto_dispatch(crp);
bad:
	if (m)
		m_freem(m);
	return (error);
}
Beispiel #12
0
int
bpf_movein(struct uio *uio, u_int linktype, struct mbuf **mp,
    struct sockaddr *sockp, struct bpf_insn *filter)
{
	struct mbuf *m;
	struct m_tag *mtag;
	int error;
	u_int hlen;
	u_int len;
	u_int slen;

	/*
	 * Build a sockaddr based on the data link layer type.
	 * We do this at this level because the ethernet header
	 * is copied directly into the data field of the sockaddr.
	 * In the case of SLIP, there is no header and the packet
	 * is forwarded as is.
	 * Also, we are careful to leave room at the front of the mbuf
	 * for the link level header.
	 */
	switch (linktype) {

	case DLT_SLIP:
		sockp->sa_family = AF_INET;
		hlen = 0;
		break;

	case DLT_PPP:
		sockp->sa_family = AF_UNSPEC;
		hlen = 0;
		break;

	case DLT_EN10MB:
		sockp->sa_family = AF_UNSPEC;
		/* XXX Would MAXLINKHDR be better? */
		hlen = ETHER_HDR_LEN;
		break;

	case DLT_FDDI:
		sockp->sa_family = AF_UNSPEC;
		/* XXX 4(FORMAC)+6(dst)+6(src)+3(LLC)+5(SNAP) */
		hlen = 24;
		break;

	case DLT_IEEE802_11:
	case DLT_IEEE802_11_RADIO:
		sockp->sa_family = AF_UNSPEC;
		hlen = 0;
		break;

	case DLT_RAW:
	case DLT_NULL:
		sockp->sa_family = AF_UNSPEC;
		hlen = 0;
		break;

	case DLT_ATM_RFC1483:
		/*
		 * en atm driver requires 4-byte atm pseudo header.
		 * though it isn't standard, vpi:vci needs to be
		 * specified anyway.
		 */
		sockp->sa_family = AF_UNSPEC;
		hlen = 12; 	/* XXX 4(ATM_PH) + 3(LLC) + 5(SNAP) */
		break;

	default:
		return (EIO);
	}

	if (uio->uio_resid > MCLBYTES)
		return (EIO);
	len = uio->uio_resid;

	MGETHDR(m, M_WAIT, MT_DATA);
	m->m_pkthdr.rcvif = 0;
	m->m_pkthdr.len = len - hlen;

	if (len > MHLEN) {
		MCLGET(m, M_WAIT);
		if ((m->m_flags & M_EXT) == 0) {
			error = ENOBUFS;
			goto bad;
		}
	}
	m->m_len = len;
	*mp = m;

	error = uiomove(mtod(m, caddr_t), len, uio);
	if (error)
		goto bad;

	slen = bpf_filter(filter, mtod(m, u_char *), len, len);
	if (slen < len) {
		error = EPERM;
		goto bad;
	}

	if (m->m_len < hlen) {
		error = EPERM;
		goto bad;
	}
	/*
	 * Make room for link header, and copy it to sockaddr
	 */
	if (hlen != 0) {
		bcopy(m->m_data, sockp->sa_data, hlen);
		m->m_len -= hlen;
		m->m_data += hlen; /* XXX */
	}

	/*
	 * Prepend the data link type as a mbuf tag
	 */
	mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_NOWAIT);
	if (mtag == NULL)
		return (ENOMEM);
	*(u_int *)(mtag + 1) = linktype;
	m_tag_prepend(m, mtag);

	return (0);
 bad:
	m_freem(m);
	return (error);
}
Beispiel #13
0
static int bsd_accept    ( cyg_file *fp, cyg_file *new_fp,
                           struct sockaddr *name, socklen_t *anamelen )
{
    struct mbuf *nam;
    socklen_t namelen = 0;
    int error = 0, s;
    register struct socket *so;

    if( anamelen != NULL )
        namelen = *anamelen;

    s = splsoftnet();
    so = (struct socket *)fp->f_data;

    if ((so->so_options & SO_ACCEPTCONN) == 0) {
        splx(s);
        return (EINVAL);
    }

    if ((so->so_state & SS_NBIO) && so->so_qlen == 0) {
        splx(s);
        return (EWOULDBLOCK);
    }

    while (so->so_qlen == 0 && so->so_error == 0) {
        if (so->so_state & SS_CANTRCVMORE) {
            so->so_error = ECONNABORTED;
            break;
        }
        error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
                       netcon, 0);
        if (error) {
            splx(s);
            return (error);
        }
    }

    if (so->so_error) {
        error = so->so_error;
        so->so_error = 0;
        splx(s);
        return (error);
    }

    {
        struct socket *aso = so->so_q;
        if (soqremque(aso, 1) == 0)
            panic("accept");
        so = aso;
    }

    cyg_selinit(&so->so_rcv.sb_sel);
    cyg_selinit(&so->so_snd.sb_sel);
    
    new_fp->f_type      = DTYPE_SOCKET;
    new_fp->f_flag     |= FREAD|FWRITE;
    new_fp->f_offset    = 0;
    new_fp->f_ops       = &bsd_sock_fileops;
    new_fp->f_data      = (CYG_ADDRWORD)so;
    new_fp->f_xops      = (CYG_ADDRWORD)&bsd_sockops;
    
    nam = m_get(M_WAIT, MT_SONAME);
    (void) soaccept(so, nam);
    if (name) {
        if (namelen > nam->m_len)
            namelen = nam->m_len;
        /* SHOULD COPY OUT A CHAIN HERE */
        if ((error = copyout(mtod(nam, caddr_t),
                             (caddr_t)name, namelen)) == 0)
            *anamelen = namelen;
    }
    m_freem(nam);
    splx(s);
    
    return (error);
}
Beispiel #14
0
/*
 * Potentially decap ESP in UDP frame.  Check for an ESP header
 * and optional marker; if present, strip the UDP header and
 * push the result through IPSec.
 *
 * Returns mbuf to be processed (potentially re-allocated) or
 * NULL if consumed and/or processed.
 */
static struct mbuf *
udp4_espdecap(struct inpcb *inp, struct mbuf *m, int off)
{
	size_t minlen, payload, skip, iphlen;
	caddr_t data;
	struct udpcb *up;
	struct m_tag *tag;
	struct udphdr *udphdr;
	struct ip *ip;

	INP_RLOCK_ASSERT(inp);

	/* 
	 * Pull up data so the longest case is contiguous:
	 *    IP/UDP hdr + non ESP marker + ESP hdr.
	 */
	minlen = off + sizeof(uint64_t) + sizeof(struct esp);
	if (minlen > m->m_pkthdr.len)
		minlen = m->m_pkthdr.len;
	if ((m = m_pullup(m, minlen)) == NULL) {
		IPSECSTAT_INC(ips_in_inval);
		return (NULL);		/* Bypass caller processing. */
	}
	data = mtod(m, caddr_t);	/* Points to ip header. */
	payload = m->m_len - off;	/* Size of payload. */

	if (payload == 1 && data[off] == '\xff')
		return (m);		/* NB: keepalive packet, no decap. */

	up = intoudpcb(inp);
	KASSERT(up != NULL, ("%s: udpcb NULL", __func__));
	KASSERT((up->u_flags & UF_ESPINUDP_ALL) != 0,
	    ("u_flags 0x%x", up->u_flags));

	/* 
	 * Check that the payload is large enough to hold an
	 * ESP header and compute the amount of data to remove.
	 *
	 * NB: the caller has already done a pullup for us.
	 * XXX can we assume alignment and eliminate bcopys?
	 */
	if (up->u_flags & UF_ESPINUDP_NON_IKE) {
		/*
		 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
		 * draft-ietf-ipsec-udp-encaps-(00/)01.txt, ignoring
		 * possible AH mode non-IKE marker+non-ESP marker
		 * from draft-ietf-ipsec-udp-encaps-00.txt.
		 */
		uint64_t marker;

		if (payload <= sizeof(uint64_t) + sizeof(struct esp))
			return (m);	/* NB: no decap. */
		bcopy(data + off, &marker, sizeof(uint64_t));
		if (marker != 0)	/* Non-IKE marker. */
			return (m);	/* NB: no decap. */
		skip = sizeof(uint64_t) + sizeof(struct udphdr);
	} else {
		uint32_t spi;

		if (payload <= sizeof(struct esp)) {
			IPSECSTAT_INC(ips_in_inval);
			m_freem(m);
			return (NULL);	/* Discard. */
		}
		bcopy(data + off, &spi, sizeof(uint32_t));
		if (spi == 0)		/* Non-ESP marker. */
			return (m);	/* NB: no decap. */
		skip = sizeof(struct udphdr);
	}

	/*
	 * Setup a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
	 * the UDP ports. This is required if we want to select
	 * the right SPD for multiple hosts behind same NAT.
	 *
	 * NB: ports are maintained in network byte order everywhere
	 *     in the NAT-T code.
	 */
	tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
		2 * sizeof(uint16_t), M_NOWAIT);
	if (tag == NULL) {
		IPSECSTAT_INC(ips_in_nomem);
		m_freem(m);
		return (NULL);		/* Discard. */
	}
	iphlen = off - sizeof(struct udphdr);
	udphdr = (struct udphdr *)(data + iphlen);
	((uint16_t *)(tag + 1))[0] = udphdr->uh_sport;
	((uint16_t *)(tag + 1))[1] = udphdr->uh_dport;
	m_tag_prepend(m, tag);

	/*
	 * Remove the UDP header (and possibly the non ESP marker)
	 * IP header length is iphlen
	 * Before:
	 *   <--- off --->
	 *   +----+------+-----+
	 *   | IP |  UDP | ESP |
	 *   +----+------+-----+
	 *        <-skip->
	 * After:
	 *          +----+-----+
	 *          | IP | ESP |
	 *          +----+-----+
	 *   <-skip->
	 */
	ovbcopy(data, data + skip, iphlen);
	m_adj(m, skip);

	ip = mtod(m, struct ip *);
	ip->ip_len = htons(ntohs(ip->ip_len) - skip);
	ip->ip_p = IPPROTO_ESP;

	/*
	 * We cannot yet update the cksums so clear any
	 * h/w cksum flags as they are no longer valid.
	 */
	if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID)
		m->m_pkthdr.csum_flags &= ~(CSUM_DATA_VALID|CSUM_PSEUDO_HDR);

	(void) ipsec4_common_input(m, iphlen, ip->ip_p);
	return (NULL);			/* NB: consumed, bypass processing. */
}
Beispiel #15
0
/*
 * nfs_request - goes something like this
 *	- fill in request struct
 *	- links it into list
 *	- calls nfs_send() for first transmit
 *	- calls nfs_receive() to get reply
 *	- break down rpc header and return with nfs reply pointed to
 *	  by mrep or error
 * nb: always frees up mreq mbuf list
 */
int
nfs_request(struct vnode *vp, struct mbuf *mreq, int procnum,
    struct thread *td, struct ucred *cred, struct mbuf **mrp,
    struct mbuf **mdp, caddr_t *dposp)
{
	struct mbuf *mrep;
	u_int32_t *tl;
	struct nfsmount *nmp;
	struct mbuf *md;
	time_t waituntil;
	caddr_t dpos;
	int error = 0, timeo;
	AUTH *auth = NULL;
	enum nfs_rto_timer_t timer;
	struct nfs_feedback_arg nf;
	struct rpc_callextra ext;
	enum clnt_stat stat;
	struct timeval timo;

	/* Reject requests while attempting a forced unmount. */
	if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) {
		m_freem(mreq);
		return (ESTALE);
	}
	nmp = VFSTONFS(vp->v_mount);
	bzero(&nf, sizeof(struct nfs_feedback_arg));
	nf.nf_mount = nmp;
	nf.nf_td = td;
	nf.nf_lastmsg = time_uptime -
	    ((nmp->nm_tprintf_delay) - (nmp->nm_tprintf_initial_delay));

	/*
	 * XXX if not already connected call nfs_connect now.  Longer
	 * term, change nfs_mount to call nfs_connect unconditionally
	 * and let clnt_reconnect_create handle reconnects.
	 */
	if (!nmp->nm_client)
		nfs_connect(nmp);

	auth = nfs_getauth(nmp, cred);
	if (!auth) {
		m_freem(mreq);
		return (EACCES);
	}
	bzero(&ext, sizeof(ext));
	ext.rc_auth = auth;

	ext.rc_feedback = nfs_feedback;
	ext.rc_feedback_arg = &nf;

	/*
	 * Use a conservative timeout for RPCs other than getattr,
	 * lookup, read or write.  The justification for doing "other"
	 * this way is that these RPCs happen so infrequently that
	 * timer est. would probably be stale.  Also, since many of
	 * these RPCs are non-idempotent, a conservative timeout is
	 * desired.
	 */
	timer = nfs_rto_timer(procnum);
	if (timer != NFS_DEFAULT_TIMER)
		ext.rc_timers = &nmp->nm_timers[timer - 1];
	else
		ext.rc_timers = NULL;

#ifdef KDTRACE_HOOKS
	if (dtrace_nfsclient_nfs23_start_probe != NULL) {
		uint32_t probe_id;
		int probe_procnum;

		if (nmp->nm_flag & NFSMNT_NFSV3) {
			probe_id = nfsclient_nfs3_start_probes[procnum];
			probe_procnum = procnum;
		} else {
			probe_id = nfsclient_nfs2_start_probes[procnum];
			probe_procnum = nfsv2_procid[procnum];
		}
		if (probe_id != 0)
			(dtrace_nfsclient_nfs23_start_probe)(probe_id, vp,
			    mreq, cred, probe_procnum);
	}
#endif

	nfsstats.rpcrequests++;
tryagain:
	/*
	 * This timeout specifies when a new socket should be created,
	 * along with new xid values. For UDP, this should be done
	 * infrequently, since retransmits of RPC requests should normally
	 * use the same xid.
	 */
	if (nmp->nm_sotype == SOCK_DGRAM) {
		if ((nmp->nm_flag & NFSMNT_SOFT) != 0) {
			/*
			 * CLSET_RETRIES is set to 2, so this should be half
			 * of the total timeout required.
			 */
			timeo = nmp->nm_retry * nmp->nm_timeo / 2;
			if (timeo < 1)
				timeo = 1;
			timo.tv_sec = timeo / NFS_HZ;
			timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ;
		} else {
			/* For UDP hard mounts, use a large value. */
			timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
			timo.tv_usec = 0;
		}
	} else {
		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
	}
	mrep = NULL;
	stat = CLNT_CALL_MBUF(nmp->nm_client, &ext,
	    (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum],
	    mreq, &mrep, timo);

	/*
	 * If there was a successful reply and a tprintf msg.
	 * tprintf a response.
	 */
	if (stat == RPC_SUCCESS)
		error = 0;
	else if (stat == RPC_TIMEDOUT) {
		nfsstats.rpctimeouts++;
		error = ETIMEDOUT;
	} else if (stat == RPC_VERSMISMATCH) {
		nfsstats.rpcinvalid++;
		error = EOPNOTSUPP;
	} else if (stat == RPC_PROGVERSMISMATCH) {
		nfsstats.rpcinvalid++;
		error = EPROTONOSUPPORT;
	} else if (stat == RPC_INTR) {
		error = EINTR;
	} else {
		nfsstats.rpcinvalid++;
		error = EACCES;
	}
	if (error)
		goto nfsmout;

	KASSERT(mrep != NULL, ("mrep shouldn't be NULL if no error\n"));

	/*
	 * Search for any mbufs that are not a multiple of 4 bytes long
	 * or with m_data not longword aligned.
	 * These could cause pointer alignment problems, so copy them to
	 * well aligned mbufs.
	 */
	error = nfs_realign(&mrep, M_NOWAIT);
	if (error == ENOMEM) {
		m_freem(mrep);
		AUTH_DESTROY(auth);
		nfsstats.rpcinvalid++;
		return (error);
	}

	md = mrep;
	dpos = mtod(mrep, caddr_t);
	tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED);
	if (*tl != 0) {
		error = fxdr_unsigned(int, *tl);
		if ((nmp->nm_flag & NFSMNT_NFSV3) &&
		    error == NFSERR_TRYLATER) {
			m_freem(mrep);
			error = 0;
			waituntil = time_second + nfs3_jukebox_delay;
			while (time_second < waituntil)
				(void)tsleep(&fake_wchan, PSOCK, "nqnfstry",
				    hz);
			goto tryagain;
		}
		/*
		 * Make sure NFSERR_RETERR isn't bogusly set by a server
		 * such as amd. (No actual NFS error has bit 31 set.)
		 */
		error &= ~NFSERR_RETERR;

		/*
		 * If the File Handle was stale, invalidate the lookup
		 * cache, just in case.
		 */
		if (error == ESTALE)
			nfs_purgecache(vp);
		/*
		 * Skip wcc data on non-ENOENT NFS errors for now.
		 * NetApp filers return corrupt postop attrs in the
		 * wcc data for NFS err EROFS.  Not sure if they could
		 * return corrupt postop attrs for others errors.
		 * Blocking ENOENT post-op attributes breaks negative
		 * name caching, so always allow it through.
		 */
		if ((nmp->nm_flag & NFSMNT_NFSV3) &&
		    (!nfs_skip_wcc_data_onerr || error == ENOENT)) {
			*mrp = mrep;
			*mdp = md;
			*dposp = dpos;
			error |= NFSERR_RETERR;
		} else
			m_freem(mrep);
		goto nfsmout;
	}
/*
 * ae_start:		[ifnet interface function]
 *
 *	Start packet transmission on the interface.
 */
static void
ae_start(struct ifnet *ifp)
{
	struct ae_softc *sc = ifp->if_softc;
	struct mbuf *m0, *m;
	struct ae_txsoft *txs, *last_txs = NULL;
	bus_dmamap_t dmamap;
	int error, firsttx, nexttx, lasttx = 1, ofree, seg;

	DPRINTF(sc, ("%s: ae_start: sc_flags 0x%08x, if_flags 0x%08x\n",
	    device_xname(sc->sc_dev), sc->sc_flags, ifp->if_flags));


	if ((ifp->if_flags & (IFF_RUNNING|IFF_OACTIVE)) != IFF_RUNNING)
		return;

	/*
	 * Remember the previous number of free descriptors and
	 * the first descriptor we'll use.
	 */
	ofree = sc->sc_txfree;
	firsttx = sc->sc_txnext;

	DPRINTF(sc, ("%s: ae_start: txfree %d, txnext %d\n",
	    device_xname(sc->sc_dev), ofree, firsttx));

	/*
	 * Loop through the send queue, setting up transmit descriptors
	 * until we drain the queue, or use up all available transmit
	 * descriptors.
	 */
	while ((txs = SIMPLEQ_FIRST(&sc->sc_txfreeq)) != NULL &&
	       sc->sc_txfree != 0) {
		/*
		 * Grab a packet off the queue.
		 */
		IFQ_POLL(&ifp->if_snd, m0);
		if (m0 == NULL)
			break;
		m = NULL;

		dmamap = txs->txs_dmamap;

		/*
		 * Load the DMA map.  If this fails, the packet either
		 * didn't fit in the alloted number of segments, or we were
		 * short on resources.  In this case, we'll copy and try
		 * again.
		 */
		if (((mtod(m0, uintptr_t) & 3) != 0) ||
		    bus_dmamap_load_mbuf(sc->sc_dmat, dmamap, m0,
		      BUS_DMA_WRITE|BUS_DMA_NOWAIT) != 0) {
			MGETHDR(m, M_DONTWAIT, MT_DATA);
			if (m == NULL) {
				printf("%s: unable to allocate Tx mbuf\n",
				    device_xname(sc->sc_dev));
				break;
			}
			MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
			if (m0->m_pkthdr.len > MHLEN) {
				MCLGET(m, M_DONTWAIT);
				if ((m->m_flags & M_EXT) == 0) {
					printf("%s: unable to allocate Tx "
					    "cluster\n", device_xname(sc->sc_dev));
					m_freem(m);
					break;
				}
			}
			m_copydata(m0, 0, m0->m_pkthdr.len, mtod(m, void *));
			m->m_pkthdr.len = m->m_len = m0->m_pkthdr.len;
			error = bus_dmamap_load_mbuf(sc->sc_dmat, dmamap,
			    m, BUS_DMA_WRITE|BUS_DMA_NOWAIT);
			if (error) {
				printf("%s: unable to load Tx buffer, "
				    "error = %d\n", device_xname(sc->sc_dev),
				    error);
				break;
			}
		}

		/*
		 * Ensure we have enough descriptors free to describe
		 * the packet.
		 */
		if (dmamap->dm_nsegs > sc->sc_txfree) {
			/*
			 * Not enough free descriptors to transmit this
			 * packet.  We haven't committed to anything yet,
			 * so just unload the DMA map, put the packet
			 * back on the queue, and punt.  Notify the upper
			 * layer that there are no more slots left.
			 *
			 * XXX We could allocate an mbuf and copy, but
			 * XXX it is worth it?
			 */
			ifp->if_flags |= IFF_OACTIVE;
			bus_dmamap_unload(sc->sc_dmat, dmamap);
			if (m != NULL)
				m_freem(m);
			break;
		}

		IFQ_DEQUEUE(&ifp->if_snd, m0);
		if (m != NULL) {
			m_freem(m0);
			m0 = m;
		}

		/*
		 * WE ARE NOW COMMITTED TO TRANSMITTING THE PACKET.
		 */

		/* Sync the DMA map. */
		bus_dmamap_sync(sc->sc_dmat, dmamap, 0, dmamap->dm_mapsize,
		    BUS_DMASYNC_PREWRITE);

		/*
		 * Initialize the transmit descriptors.
		 */
		for (nexttx = sc->sc_txnext, seg = 0;
		     seg < dmamap->dm_nsegs;
		     seg++, nexttx = AE_NEXTTX(nexttx)) {
			/*
			 * If this is the first descriptor we're
			 * enqueueing, don't set the OWN bit just
			 * yet.  That could cause a race condition.
			 * We'll do it below.
			 */
			sc->sc_txdescs[nexttx].ad_status =
			    (nexttx == firsttx) ? 0 : ADSTAT_OWN;
			sc->sc_txdescs[nexttx].ad_bufaddr1 =
			    dmamap->dm_segs[seg].ds_addr;
			sc->sc_txdescs[nexttx].ad_ctl =
			    (dmamap->dm_segs[seg].ds_len <<
				ADCTL_SIZE1_SHIFT) |
				(nexttx == (AE_NTXDESC - 1) ?
				    ADCTL_ER : 0);
			lasttx = nexttx;
		}

		KASSERT(lasttx != -1);

		/* Set `first segment' and `last segment' appropriately. */
		sc->sc_txdescs[sc->sc_txnext].ad_ctl |= ADCTL_Tx_FS;
		sc->sc_txdescs[lasttx].ad_ctl |= ADCTL_Tx_LS;

#ifdef AE_DEBUG
		if (ifp->if_flags & IFF_DEBUG) {
			printf("     txsoft %p transmit chain:\n", txs);
			for (seg = sc->sc_txnext;; seg = AE_NEXTTX(seg)) {
				printf("     descriptor %d:\n", seg);
				printf("       ad_status:   0x%08x\n",
				    sc->sc_txdescs[seg].ad_status);
				printf("       ad_ctl:      0x%08x\n",
				    sc->sc_txdescs[seg].ad_ctl);
				printf("       ad_bufaddr1: 0x%08x\n",
				    sc->sc_txdescs[seg].ad_bufaddr1);
				printf("       ad_bufaddr2: 0x%08x\n",
				    sc->sc_txdescs[seg].ad_bufaddr2);
				if (seg == lasttx)
					break;
			}
		}
#endif

		/* Sync the descriptors we're using. */
		AE_CDTXSYNC(sc, sc->sc_txnext, dmamap->dm_nsegs,
		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);

		/*
		 * Store a pointer to the packet so we can free it later,
		 * and remember what txdirty will be once the packet is
		 * done.
		 */
		txs->txs_mbuf = m0;
		txs->txs_firstdesc = sc->sc_txnext;
		txs->txs_lastdesc = lasttx;
		txs->txs_ndescs = dmamap->dm_nsegs;

		/* Advance the tx pointer. */
		sc->sc_txfree -= dmamap->dm_nsegs;
		sc->sc_txnext = nexttx;

		SIMPLEQ_REMOVE_HEAD(&sc->sc_txfreeq, txs_q);
		SIMPLEQ_INSERT_TAIL(&sc->sc_txdirtyq, txs, txs_q);

		last_txs = txs;

		/*
		 * Pass the packet to any BPF listeners.
		 */
		bpf_mtap(ifp, m0);
	}
Beispiel #17
0
			*np = m->m_next;
			m->m_next = n0;

			n->m_len = hlen;
			m->m_len = skip;

			m = n;			/* header is at front ... */
			*off = 0;		/* ... of new mbuf */
		}
		IPSECSTAT_INC(ips_mbinserted);
	} else {
		/*
		 * Copy the remainder to the back of the mbuf
		 * so there's space to write the new header.
		 */
		bcopy(mtod(m, caddr_t) + skip,
		      mtod(m, caddr_t) + skip + hlen, remain);
		m->m_len += hlen;
		*off = skip;
	}
	m0->m_pkthdr.len += hlen;		/* adjust packet length */
	return m;
}

/*
 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
 * length is updated, and a pointer to the first byte of the padding
 * (which is guaranteed to be all in one mbuf) is returned.
 */
caddr_t
m_pad(struct mbuf *m, int n)
Beispiel #18
0
/*
 * Defragment a mbuf chain, returning the shortest possible
 * chain of mbufs and clusters.  If allocation fails and
 * this cannot be completed, NULL will be returned, but
 * the passed in chain will be unchanged.  Upon success,
 * the original chain will be freed, and the new chain
 * will be returned.
 *
 * If a non-packet header is passed in, the original
 * mbuf (chain?) will be returned unharmed.
 */
struct mbuf *
m_defrag(struct mbuf *m0, int how)
{
	struct mbuf *m_new = NULL, *m_final = NULL;
	int progress = 0, length;

	MBUF_CHECKSLEEP(how);
	if (!(m0->m_flags & M_PKTHDR))
		return (m0);

	m_fixhdr(m0); /* Needed sanity check */

#ifdef MBUF_STRESS_TEST
	if (m_defragrandomfailures) {
		int temp = arc4random() & 0xff;
		if (temp == 0xba)
			goto nospace;
	}
#endif

	if (m0->m_pkthdr.len > MHLEN)
		m_final = m_getcl(how, MT_DATA, M_PKTHDR);
	else
		m_final = m_gethdr(how, MT_DATA);

	if (m_final == NULL)
		goto nospace;

	if (m_dup_pkthdr(m_final, m0, how) == 0)
		goto nospace;

	m_new = m_final;

	while (progress < m0->m_pkthdr.len) {
		length = m0->m_pkthdr.len - progress;
		if (length > MCLBYTES)
			length = MCLBYTES;

		if (m_new == NULL) {
			if (length > MLEN)
				m_new = m_getcl(how, MT_DATA, 0);
			else
				m_new = m_get(how, MT_DATA);
			if (m_new == NULL)
				goto nospace;
		}

		m_copydata(m0, progress, length, mtod(m_new, caddr_t));
		progress += length;
		m_new->m_len = length;
		if (m_new != m_final)
			m_cat(m_final, m_new);
		m_new = NULL;
	}
#ifdef MBUF_STRESS_TEST
	if (m0->m_next == NULL)
		m_defraguseless++;
#endif
	m_freem(m0);
	m0 = m_final;
#ifdef MBUF_STRESS_TEST
	m_defragpackets++;
	m_defragbytes += m0->m_pkthdr.len;
#endif
	return (m0);
nospace:
#ifdef MBUF_STRESS_TEST
	m_defragfailure++;
#endif
	if (m_final)
		m_freem(m_final);
	return (NULL);
}
Beispiel #19
0
/*
 * ESP output routine, called by ipsp_process_packet().
 */
int
esp_output(struct mbuf *m, struct tdb *tdb, struct mbuf **mp, int skip,
    int protoff)
{
	struct enc_xform *espx = (struct enc_xform *) tdb->tdb_encalgxform;
	struct auth_hash *esph = (struct auth_hash *) tdb->tdb_authalgxform;
	int ilen, hlen, rlen, padding, blks, alen;
	struct mbuf *mi, *mo = (struct mbuf *) NULL;
	struct tdb_crypto *tc;
	unsigned char *pad;
	u_int8_t prot;

	struct cryptodesc *crde = NULL, *crda = NULL;
	struct cryptop *crp;
#if NBPFILTER > 0
	struct ifnet *ifn = &(encif[0].sc_if);

	ifn->if_opackets++;
	ifn->if_obytes += m->m_pkthdr.len;

	if (ifn->if_bpf) {
		struct enchdr hdr;

		bzero (&hdr, sizeof(hdr));

		hdr.af = tdb->tdb_dst.sa.sa_family;
		hdr.spi = tdb->tdb_spi;
		if (espx)
			hdr.flags |= M_CONF;
		if (esph)
			hdr.flags |= M_AUTH;

		bpf_mtap_hdr(ifn->if_bpf, (char *)&hdr, ENC_HDRLEN, m,
		    BPF_DIRECTION_OUT);
	}
#endif

	if (tdb->tdb_flags & TDBF_NOREPLAY)
		hlen = sizeof(u_int32_t) + tdb->tdb_ivlen;
	else
		hlen = 2 * sizeof(u_int32_t) + tdb->tdb_ivlen;

	rlen = m->m_pkthdr.len - skip; /* Raw payload length. */
	if (espx)
		blks = espx->blocksize;
	else
		blks = 4; /* If no encryption, we have to be 4-byte aligned. */

	padding = ((blks - ((rlen + 2) % blks)) % blks) + 2;

	if (esph)
		alen = AH_HMAC_HASHLEN;
	else
		alen = 0;

	espstat.esps_output++;

	switch (tdb->tdb_dst.sa.sa_family) {
#ifdef INET
	case AF_INET:
		/* Check for IP maximum packet size violations. */
		if (skip + hlen + rlen + padding + alen > IP_MAXPACKET)	{
			DPRINTF(("esp_output(): packet in SA %s/%08x got "
			    "too big\n", ipsp_address(tdb->tdb_dst),
			    ntohl(tdb->tdb_spi)));
			m_freem(m);
			espstat.esps_toobig++;
			return EMSGSIZE;
		}
		break;
#endif /* INET */

#ifdef INET6
	case AF_INET6:
		/* Check for IPv6 maximum packet size violations. */
		if (skip + hlen + rlen + padding + alen > IPV6_MAXPACKET) {
			DPRINTF(("esp_output(): packet in SA %s/%08x got too "
			    "big\n", ipsp_address(tdb->tdb_dst),
			    ntohl(tdb->tdb_spi)));
			m_freem(m);
			espstat.esps_toobig++;
			return EMSGSIZE;
		}
		break;
#endif /* INET6 */

	default:
		DPRINTF(("esp_output(): unknown/unsupported protocol "
		    "family %d, SA %s/%08x\n", tdb->tdb_dst.sa.sa_family
		    , ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi)));
		m_freem(m);
		espstat.esps_nopf++;
		return EPFNOSUPPORT;
	}

	/* Update the counters. */
	tdb->tdb_cur_bytes += m->m_pkthdr.len - skip;
	espstat.esps_obytes += m->m_pkthdr.len - skip;

	/* Hard byte expiration. */
	if (tdb->tdb_flags & TDBF_BYTES &&
	    tdb->tdb_cur_bytes >= tdb->tdb_exp_bytes) {
		pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_HARD);
		tdb_delete(tdb);
		m_freem(m);
		return EINVAL;
	}

	/* Soft byte expiration. */
	if (tdb->tdb_flags & TDBF_SOFT_BYTES &&
	    tdb->tdb_cur_bytes >= tdb->tdb_soft_bytes) {
		pfkeyv2_expire(tdb, SADB_EXT_LIFETIME_SOFT);
		tdb->tdb_flags &= ~TDBF_SOFT_BYTES;    /* Turn off checking. */
	}

	/*
	 * Loop through mbuf chain; if we find a readonly mbuf,
	 * replace the rest of the chain.
	 */
	mo = NULL;
	mi = m;
	while (mi != NULL && !M_READONLY(mi)) {
		mo = mi;
		mi = mi->m_next;
	}

	if (mi != NULL)	{
		/* Replace the rest of the mbuf chain. */
		struct mbuf *n = m_copym2(mi, 0, M_COPYALL, M_DONTWAIT);

		if (n == NULL) {
			DPRINTF(("esp_output(): bad mbuf chain, SA %s/%08x\n",
			    ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi)));
			espstat.esps_hdrops++;
			m_freem(m);
			return ENOBUFS;
		}

		if (mo != NULL)
			mo->m_next = n;
		else
			m = n;

		m_freem(mi);
	}

	/* Inject ESP header. */
	mo = m_inject(m, skip, hlen, M_DONTWAIT);
	if (mo == NULL) {
		DPRINTF(("esp_output(): failed to inject ESP header for "
		    "SA %s/%08x\n", ipsp_address(tdb->tdb_dst),
		    ntohl(tdb->tdb_spi)));
		m_freem(m);
		espstat.esps_hdrops++;
		return ENOBUFS;
	}

	/* Initialize ESP header. */
	bcopy((caddr_t) &tdb->tdb_spi, mtod(mo, caddr_t), sizeof(u_int32_t));
	if (!(tdb->tdb_flags & TDBF_NOREPLAY)) {
		u_int32_t replay = htonl(tdb->tdb_rpl++);
		bcopy((caddr_t) &replay, mtod(mo, caddr_t) + sizeof(u_int32_t),
		    sizeof(u_int32_t));
#if NPFSYNC > 0
		pfsync_update_tdb(tdb,1);
#endif
	}

	/*
	 * Add padding -- better to do it ourselves than use the crypto engine,
	 * although if/when we support compression, we'd have to do that.
	 */
	mo = m_inject(m, m->m_pkthdr.len, padding + alen, M_DONTWAIT);
	if (mo == NULL) {
		DPRINTF(("esp_output(): m_inject failed for SA %s/%08x\n",
		    ipsp_address(tdb->tdb_dst), ntohl(tdb->tdb_spi)));
		m_freem(m);
		return ENOBUFS;
	}
	pad = mtod(mo, u_char *);

	/* Self-describing or random padding ? */
	if (!(tdb->tdb_flags & TDBF_RANDOMPADDING))
		for (ilen = 0; ilen < padding - 2; ilen++)
			pad[ilen] = ilen + 1;
	else
		arc4random_buf((void *) pad, padding - 2);

	/* Fix padding length and Next Protocol in padding itself. */
	pad[padding - 2] = padding - 2;
	m_copydata(m, protoff, sizeof(u_int8_t), pad + padding - 1);

	/* Fix Next Protocol in IPv4/IPv6 header. */
	prot = IPPROTO_ESP;
	m_copyback(m, protoff, sizeof(u_int8_t), &prot);

	/* Get crypto descriptors. */
	crp = crypto_getreq(esph && espx ? 2 : 1);
	if (crp == NULL) {
		m_freem(m);
		DPRINTF(("esp_output(): failed to acquire crypto "
		    "descriptors\n"));
		espstat.esps_crypto++;
		return ENOBUFS;
	}

	if (espx) {
		crde = crp->crp_desc;
		crda = crde->crd_next;

		/* Encryption descriptor. */
		crde->crd_skip = skip + hlen;
		crde->crd_len = m->m_pkthdr.len - (skip + hlen + alen);
		crde->crd_flags = CRD_F_ENCRYPT;
		crde->crd_inject = skip + hlen - tdb->tdb_ivlen;

		if (tdb->tdb_flags & TDBF_HALFIV) {
			/* Copy half-iv in the packet. */
			m_copyback(m, crde->crd_inject, tdb->tdb_ivlen,
			    tdb->tdb_iv);

			/* Cook half-iv. */
			bcopy(tdb->tdb_iv, crde->crd_iv, tdb->tdb_ivlen);
			for (ilen = 0; ilen < tdb->tdb_ivlen; ilen++)
				crde->crd_iv[tdb->tdb_ivlen + ilen] =
				    ~crde->crd_iv[ilen];

			crde->crd_flags |=
			    CRD_F_IV_PRESENT | CRD_F_IV_EXPLICIT;
		}

		/* Encryption operation. */
		crde->crd_alg = espx->type;
		crde->crd_key = tdb->tdb_emxkey;
		crde->crd_klen = tdb->tdb_emxkeylen * 8;
		/* XXX Rounds ? */
	} else
		crda = crp->crp_desc;

	/* IPsec-specific opaque crypto info. */
	tc = malloc(sizeof(*tc), M_XDATA, M_NOWAIT | M_ZERO);
	if (tc == NULL) {
		m_freem(m);
		crypto_freereq(crp);
		DPRINTF(("esp_output(): failed to allocate tdb_crypto\n"));
		espstat.esps_crypto++;
		return ENOBUFS;
	}

	tc->tc_spi = tdb->tdb_spi;
	tc->tc_proto = tdb->tdb_sproto;
	bcopy(&tdb->tdb_dst, &tc->tc_dst, sizeof(union sockaddr_union));

	/* Crypto operation descriptor. */
	crp->crp_ilen = m->m_pkthdr.len; /* Total input length. */
	crp->crp_flags = CRYPTO_F_IMBUF;
	crp->crp_buf = (caddr_t) m;
	crp->crp_callback = (int (*) (struct cryptop *)) esp_output_cb;
	crp->crp_opaque = (caddr_t) tc;
	crp->crp_sid = tdb->tdb_cryptoid;

	if (esph) {
		/* Authentication descriptor. */
		crda->crd_skip = skip;
		crda->crd_len = m->m_pkthdr.len - (skip + alen);
		crda->crd_inject = m->m_pkthdr.len - alen;

		/* Authentication operation. */
		crda->crd_alg = esph->type;
		crda->crd_key = tdb->tdb_amxkey;
		crda->crd_klen = tdb->tdb_amxkeylen * 8;
	}

	if ((tdb->tdb_flags & TDBF_SKIPCRYPTO) == 0)
		return crypto_dispatch(crp);
	else
		return esp_output_cb(crp);
}
Beispiel #20
0
/*
 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
 * continuing for "len" bytes.  If len is M_COPYALL, copy to end of mbuf.
 * The wait parameter is a choice of M_WAITOK/M_NOWAIT from caller.
 * Note that the copy is read-only, because clusters are not copied,
 * only their reference counts are incremented.
 */
struct mbuf *
m_copym(struct mbuf *m, int off0, int len, int wait)
{
	struct mbuf *n, **np;
	int off = off0;
	struct mbuf *top;
	int copyhdr = 0;

	KASSERT(off >= 0, ("m_copym, negative off %d", off));
	KASSERT(len >= 0, ("m_copym, negative len %d", len));
	MBUF_CHECKSLEEP(wait);
	if (off == 0 && m->m_flags & M_PKTHDR)
		copyhdr = 1;
	while (off > 0) {
		KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain"));
		if (off < m->m_len)
			break;
		off -= m->m_len;
		m = m->m_next;
	}
	np = &top;
	top = 0;
	while (len > 0) {
		if (m == NULL) {
			KASSERT(len == M_COPYALL,
			    ("m_copym, length > size of mbuf chain"));
			break;
		}
		if (copyhdr)
			n = m_gethdr(wait, m->m_type);
		else
			n = m_get(wait, m->m_type);
		*np = n;
		if (n == NULL)
			goto nospace;
		if (copyhdr) {
			if (!m_dup_pkthdr(n, m, wait))
				goto nospace;
			if (len == M_COPYALL)
				n->m_pkthdr.len -= off0;
			else
				n->m_pkthdr.len = len;
			copyhdr = 0;
		}
		n->m_len = min(len, m->m_len - off);
		if (m->m_flags & M_EXT) {
			n->m_data = m->m_data + off;
			mb_dupcl(n, m);
		} else
			bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
			    (u_int)n->m_len);
		if (len != M_COPYALL)
			len -= n->m_len;
		off = 0;
		m = m->m_next;
		np = &n->m_next;
	}

	return (top);
nospace:
	m_freem(top);
	return (NULL);
}
struct mbuf *
ieee80211_ccmp_decrypt(struct ieee80211com *ic, struct mbuf *m0,
    struct ieee80211_key *k)
{
	struct ieee80211_ccmp_ctx *ctx = k->k_priv;
	struct ieee80211_frame *wh;
	u_int64_t pn, *prsc;
	const u_int8_t *ivp, *src;
	u_int8_t *dst;
	u_int8_t mic0[IEEE80211_CCMP_MICLEN];
	u_int8_t a[16], b[16], s0[16], s[16];
	struct mbuf *n0, *m, *n;
	int hdrlen, left, moff, noff, len;
	u_int16_t ctr;
	int i, j;

	wh = mtod(m0, struct ieee80211_frame *);
	hdrlen = ieee80211_get_hdrlen(wh);
	ivp = (u_int8_t *)wh + hdrlen;

	if (m0->m_pkthdr.len < hdrlen + IEEE80211_CCMP_HDRLEN +
	    IEEE80211_CCMP_MICLEN) {
		m_freem(m0);
		return NULL;
	}
	/* check that ExtIV bit is set */
	if (!(ivp[3] & IEEE80211_WEP_EXTIV)) {
		m_freem(m0);
		return NULL;
	}

	/* retrieve last seen packet number for this frame type/priority */
	if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) ==
	    IEEE80211_FC0_TYPE_DATA) {
		u_int8_t tid = ieee80211_has_qos(wh) ?
		    ieee80211_get_qos(wh) & IEEE80211_QOS_TID : 0;
		prsc = &k->k_rsc[tid];
	} else	/* 11w: management frames have their own counters */
		prsc = &k->k_mgmt_rsc;

	/* extract the 48-bit PN from the CCMP header */
	pn = (u_int64_t)ivp[0]       |
	     (u_int64_t)ivp[1] <<  8 |
	     (u_int64_t)ivp[4] << 16 |
	     (u_int64_t)ivp[5] << 24 |
	     (u_int64_t)ivp[6] << 32 |
	     (u_int64_t)ivp[7] << 40;
	if (pn <= *prsc) {
		/* replayed frame, discard */
		ic->ic_stats.is_ccmp_replays++;
		m_freem(m0);
		return NULL;
	}

	MGET(n0, M_DONTWAIT, m0->m_type);
	if (n0 == NULL)
		goto nospace;
	if (m_dup_pkthdr(n0, m0, M_DONTWAIT))
		goto nospace;
	n0->m_pkthdr.len -= IEEE80211_CCMP_HDRLEN + IEEE80211_CCMP_MICLEN;
	n0->m_len = MHLEN;
	if (n0->m_pkthdr.len >= MINCLSIZE) {
		MCLGET(n0, M_DONTWAIT);
		if (n0->m_flags & M_EXT)
			n0->m_len = n0->m_ext.ext_size;
	}
	if (n0->m_len > n0->m_pkthdr.len)
		n0->m_len = n0->m_pkthdr.len;

	/* construct initial B, A and S_0 blocks */
	ieee80211_ccmp_phase1(&ctx->rijndael, wh, pn,
	    n0->m_pkthdr.len - hdrlen, b, a, s0);

	/* copy 802.11 header and clear protected bit */
	memcpy(mtod(n0, caddr_t), wh, hdrlen);
	wh = mtod(n0, struct ieee80211_frame *);
	wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED;

	/* construct S_1 */
	ctr = 1;
	a[14] = ctr >> 8;
	a[15] = ctr & 0xff;
	rijndael_encrypt(&ctx->rijndael, a, s);

	/* decrypt frame body and compute MIC */
	j = 0;
	m = m0;
	n = n0;
	moff = hdrlen + IEEE80211_CCMP_HDRLEN;
	noff = hdrlen;
	left = n0->m_pkthdr.len - noff;
	while (left > 0) {
		if (moff == m->m_len) {
			/* nothing left to copy from m */
			m = m->m_next;
			moff = 0;
		}
		if (noff == n->m_len) {
			/* n is full and there's more data to copy */
			MGET(n->m_next, M_DONTWAIT, n->m_type);
			if (n->m_next == NULL)
				goto nospace;
			n = n->m_next;
			n->m_len = MLEN;
			if (left >= MINCLSIZE) {
				MCLGET(n, M_DONTWAIT);
				if (n->m_flags & M_EXT)
					n->m_len = n->m_ext.ext_size;
			}
			if (n->m_len > left)
				n->m_len = left;
			noff = 0;
		}
		len = min(m->m_len - moff, n->m_len - noff);

		src = mtod(m, u_int8_t *) + moff;
		dst = mtod(n, u_int8_t *) + noff;
		for (i = 0; i < len; i++) {
			/* decrypt message */
			dst[i] = src[i] ^ s[j];
			/* update MIC with clear text */
			b[j] ^= dst[i];
			if (++j < 16)
				continue;
			/* we have a full block, encrypt MIC */
			rijndael_encrypt(&ctx->rijndael, b, b);
			/* construct a new S_ctr block */
			ctr++;
			a[14] = ctr >> 8;
			a[15] = ctr & 0xff;
			rijndael_encrypt(&ctx->rijndael, a, s);
			j = 0;
		}

		moff += len;
		noff += len;
		left -= len;
	}
	if (j != 0)	/* partial block, encrypt MIC */
		rijndael_encrypt(&ctx->rijndael, b, b);

	/* finalize MIC, U := T XOR first-M-bytes( S_0 ) */
	for (i = 0; i < IEEE80211_CCMP_MICLEN; i++)
		b[i] ^= s0[i];

	/* check that it matches the MIC in received frame */
	m_copydata(m, moff, IEEE80211_CCMP_MICLEN, mic0);
	if (timingsafe_bcmp(mic0, b, IEEE80211_CCMP_MICLEN) != 0) {
		ic->ic_stats.is_ccmp_dec_errs++;
		m_freem(m0);
		m_freem(n0);
		return NULL;
	}

	/* update last seen packet number (MIC is validated) */
	*prsc = pn;

	m_freem(m0);
	return n0;
 nospace:
	ic->ic_stats.is_rx_nombuf++;
	m_freem(m0);
	if (n0 != NULL)
		m_freem(n0);
	return NULL;
}
Beispiel #22
0
/*
 * Partition an mbuf chain in two pieces, returning the tail --
 * all but the first len0 bytes.  In case of failure, it returns NULL and
 * attempts to restore the chain to its original state.
 *
 * Note that the resulting mbufs might be read-only, because the new
 * mbuf can end up sharing an mbuf cluster with the original mbuf if
 * the "breaking point" happens to lie within a cluster mbuf. Use the
 * M_WRITABLE() macro to check for this case.
 */
struct mbuf *
m_split(struct mbuf *m0, int len0, int wait)
{
	struct mbuf *m, *n;
	u_int len = len0, remain;

	MBUF_CHECKSLEEP(wait);
	for (m = m0; m && len > m->m_len; m = m->m_next)
		len -= m->m_len;
	if (m == NULL)
		return (NULL);
	remain = m->m_len - len;
	if (m0->m_flags & M_PKTHDR && remain == 0) {
		n = m_gethdr(wait, m0->m_type);
		if (n == NULL)
			return (NULL);
		n->m_next = m->m_next;
		m->m_next = NULL;
		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
		m0->m_pkthdr.len = len0;
		return (n);
	} else if (m0->m_flags & M_PKTHDR) {
		n = m_gethdr(wait, m0->m_type);
		if (n == NULL)
			return (NULL);
		n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif;
		n->m_pkthdr.len = m0->m_pkthdr.len - len0;
		m0->m_pkthdr.len = len0;
		if (m->m_flags & M_EXT)
			goto extpacket;
		if (remain > MHLEN) {
			/* m can't be the lead packet */
			M_ALIGN(n, 0);
			n->m_next = m_split(m, len, wait);
			if (n->m_next == NULL) {
				(void) m_free(n);
				return (NULL);
			} else {
				n->m_len = 0;
				return (n);
			}
		} else
			M_ALIGN(n, remain);
	} else if (remain == 0) {
		n = m->m_next;
		m->m_next = NULL;
		return (n);
	} else {
		n = m_get(wait, m->m_type);
		if (n == NULL)
			return (NULL);
		M_ALIGN(n, remain);
	}
extpacket:
	if (m->m_flags & M_EXT) {
		n->m_data = m->m_data + len;
		mb_dupcl(n, m);
	} else {
		bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain);
	}
	n->m_len = remain;
	m->m_len = len;
	n->m_next = m->m_next;
	m->m_next = NULL;
	return (n);
}
Beispiel #23
0
/*
 * Make space for a new header of length hlen at skip bytes
 * into the packet.  When doing this we allocate new mbufs only
 * when absolutely necessary.  The mbuf where the new header
 * is to go is returned together with an offset into the mbuf.
 * If NULL is returned then the mbuf chain may have been modified;
 * the caller is assumed to always free the chain.
 */
struct mbuf *
m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
{
	struct mbuf *m;
	unsigned remain;

	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));

	for (m = m0; m && skip > m->m_len; m = m->m_next)
		skip -= m->m_len;
	if (m == NULL)
		return (NULL);
	/*
	 * At this point skip is the offset into the mbuf m
	 * where the new header should be placed.  Figure out
	 * if there's space to insert the new header.  If so,
	 * and copying the remainder makese sense then do so.
	 * Otherwise insert a new mbuf in the chain, splitting
	 * the contents of m as needed.
	 */
	remain = m->m_len - skip;		/* data to move */
	if (hlen > M_TRAILINGSPACE(m)) {
		struct mbuf *n;

		/* XXX code doesn't handle clusters XXX */
		KASSERT(remain < MLEN,
			("m_makespace: remainder too big: %u", remain));
		/*
		 * Not enough space in m, split the contents
		 * of m, inserting new mbufs as required.
		 *
		 * NB: this ignores mbuf types.
		 */
		MGET(n, MB_DONTWAIT, MT_DATA);
		if (n == NULL)
			return (NULL);
		n->m_next = m->m_next;		/* splice new mbuf */
		m->m_next = n;
		newipsecstat.ips_mbinserted++;
		if (hlen <= M_TRAILINGSPACE(m) + remain) {
			/*
			 * New header fits in the old mbuf if we copy
			 * the remainder; just do the copy to the new
			 * mbuf and we're good to go.
			 */
			memcpy(mtod(n, caddr_t),
			       mtod(m, caddr_t) + skip, remain);
			n->m_len = remain;
			m->m_len = skip + hlen;
			*off = skip;
		} else {
			/*
			 * No space in the old mbuf for the new header.
			 * Make space in the new mbuf and check the
			 * remainder'd data fits too.  If not then we
			 * must allocate an additional mbuf (yech).
			 */
			n->m_len = 0;
			if (remain + hlen > M_TRAILINGSPACE(n)) {
				struct mbuf *n2;

				MGET(n2, MB_DONTWAIT, MT_DATA);
				/* NB: new mbuf is on chain, let caller free */
				if (n2 == NULL)
					return (NULL);
				n2->m_len = 0;
				memcpy(mtod(n2, caddr_t),
				       mtod(m, caddr_t) + skip, remain);
				n2->m_len = remain;
				/* splice in second mbuf */
				n2->m_next = n->m_next;
				n->m_next = n2;
				newipsecstat.ips_mbinserted++;
			} else {
				memcpy(mtod(n, caddr_t) + hlen,
				       mtod(m, caddr_t) + skip, remain);
				n->m_len += remain;
			}
			m->m_len -= remain;
			n->m_len += hlen;
			m = n;			/* header is at front ... */
			*off = 0;		/* ... of new mbuf */
		}
	} else {
		/*
		 * Copy the remainder to the back of the mbuf
		 * so there's space to write the new header.
		 */
		/* XXX can this be memcpy? does it handle overlap? */
		ovbcopy(mtod(m, caddr_t) + skip,
			mtod(m, caddr_t) + skip + hlen, remain);
		m->m_len += hlen;
		*off = skip;
	}
	m0->m_pkthdr.len += hlen;		/* adjust packet length */
	return m;
}
Beispiel #24
0
/*
 * TCP input routine, follows pages 65-76 of the
 * protocol specification dated September, 1981 very closely.
 */
void
tcp_input(struct mbuf *m, int iphlen, struct socket *inso)
{
  	struct ip save_ip, *ip;
	register struct tcpiphdr *ti;
	caddr_t optp = NULL;
	int optlen = 0;
	int len, tlen, off;
        register struct tcpcb *tp = NULL;
	register int tiflags;
        struct socket *so = NULL;
	int todrop, acked, ourfinisacked, needoutput = 0;
	int iss = 0;
	u_long tiwin;
	int ret;
    struct ex_list *ex_ptr;
    Slirp *slirp;

	DEBUG_CALL("tcp_input");
	DEBUG_ARGS((dfd, " m = %8lx  iphlen = %2d  inso = %lx\n",
		    (long )m, iphlen, (long )inso ));

	/*
	 * If called with m == 0, then we're continuing the connect
	 */
	if (m == NULL) {
		so = inso;
		slirp = so->slirp;

		/* Re-set a few variables */
		tp = sototcpcb(so);
		m = so->so_m;
                so->so_m = NULL;
		ti = so->so_ti;
		tiwin = ti->ti_win;
		tiflags = ti->ti_flags;

		goto cont_conn;
	}
	slirp = m->slirp;

	/*
	 * Get IP and TCP header together in first mbuf.
	 * Note: IP leaves IP header in first mbuf.
	 */
	ti = mtod(m, struct tcpiphdr *);
	if (iphlen > sizeof(struct ip )) {
	  ip_stripoptions(m, (struct mbuf *)0);
	  iphlen=sizeof(struct ip );
	}
	/* XXX Check if too short */


	/*
	 * Save a copy of the IP header in case we want restore it
	 * for sending an ICMP error message in response.
	 */
	ip=mtod(m, struct ip *);
	save_ip = *ip;
	save_ip.ip_len+= iphlen;

	/*
	 * Checksum extended TCP header and data.
	 */
	tlen = ((struct ip *)ti)->ip_len;
        tcpiphdr2qlink(ti)->next = tcpiphdr2qlink(ti)->prev = NULL;
        memset(&ti->ti_i.ih_mbuf, 0 , sizeof(struct mbuf_ptr));
	ti->ti_x1 = 0;
	ti->ti_len = htons((uint16_t)tlen);
	len = sizeof(struct ip ) + tlen;
	if(cksum(m, len)) {
	  goto drop;
	}

	/*
	 * Check that TCP offset makes sense,
	 * pull out TCP options and adjust length.		XXX
	 */
	off = ti->ti_off << 2;
	if (off < sizeof (struct tcphdr) || off > tlen) {
	  goto drop;
	}
	tlen -= off;
	ti->ti_len = tlen;
	if (off > sizeof (struct tcphdr)) {
	  optlen = off - sizeof (struct tcphdr);
	  optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
	}
	tiflags = ti->ti_flags;

	/*
	 * Convert TCP protocol specific fields to host format.
	 */
	NTOHL(ti->ti_seq);
	NTOHL(ti->ti_ack);
	NTOHS(ti->ti_win);
	NTOHS(ti->ti_urp);

	/*
	 * Drop TCP, IP headers and TCP options.
	 */
	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);

    if (slirp->restricted) {
        for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
            if (ex_ptr->ex_fport == ti->ti_dport &&
                ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
                break;
            }
        }
        if (!ex_ptr)
            goto drop;
    }
	/*
	 * Locate pcb for segment.
	 */
findso:
	so = slirp->tcp_last_so;
	if (so->so_fport != ti->ti_dport ||
	    so->so_lport != ti->ti_sport ||
	    so->so_laddr.s_addr != ti->ti_src.s_addr ||
	    so->so_faddr.s_addr != ti->ti_dst.s_addr) {
		so = solookup(&slirp->tcb, ti->ti_src, ti->ti_sport,
			       ti->ti_dst, ti->ti_dport);
		if (so)
			slirp->tcp_last_so = so;
	}

	/*
	 * If the state is CLOSED (i.e., TCB does not exist) then
	 * all data in the incoming segment is discarded.
	 * If the TCB exists but is in CLOSED state, it is embryonic,
	 * but should either do a listen or a connect soon.
	 *
	 * state == CLOSED means we've done socreate() but haven't
	 * attached it to a protocol yet...
	 *
	 * XXX If a TCB does not exist, and the TH_SYN flag is
	 * the only flag set, then create a session, mark it
	 * as if it was LISTENING, and continue...
	 */
        if (so == NULL) {
	  if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
	    goto dropwithreset;

	  if ((so = socreate(slirp)) == NULL)
	    goto dropwithreset;
	  if (tcp_attach(so) < 0) {
	    free(so); /* Not sofree (if it failed, it's not insqued) */
	    goto dropwithreset;
	  }

	  sbreserve(&so->so_snd, TCP_SNDSPACE);
	  sbreserve(&so->so_rcv, TCP_RCVSPACE);

	  so->so_laddr = ti->ti_src;
	  so->so_lport = ti->ti_sport;
	  so->so_faddr = ti->ti_dst;
	  so->so_fport = ti->ti_dport;

	  if ((so->so_iptos = tcp_tos(so)) == 0)
	    so->so_iptos = ((struct ip *)ti)->ip_tos;

	  tp = sototcpcb(so);
	  tp->t_state = TCPS_LISTEN;
	}

        /*
         * If this is a still-connecting socket, this probably
         * a retransmit of the SYN.  Whether it's a retransmit SYN
	 * or something else, we nuke it.
         */
        if (so->so_state & SS_ISFCONNECTING)
                goto drop;

	tp = sototcpcb(so);

	/* XXX Should never fail */
        if (tp == NULL)
		goto dropwithreset;
	if (tp->t_state == TCPS_CLOSED)
		goto drop;

	tiwin = ti->ti_win;

	/*
	 * Segment received on connection.
	 * Reset idle time and keep-alive timer.
	 */
	tp->t_idle = 0;
	if (SO_OPTIONS)
	   tp->t_timer[TCPT_KEEP] = TCPTV_KEEPINTVL;
	else
	   tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_IDLE;

	/*
	 * Process options if not in LISTEN state,
	 * else do it below (after getting remote address).
	 */
	if (optp && tp->t_state != TCPS_LISTEN)
		tcp_dooptions(tp, (u_char *)optp, optlen, ti);

	/*
	 * Header prediction: check for the two common cases
	 * of a uni-directional data xfer.  If the packet has
	 * no control flags, is in-sequence, the window didn't
	 * change and we're not retransmitting, it's a
	 * candidate.  If the length is zero and the ack moved
	 * forward, we're the sender side of the xfer.  Just
	 * free the data acked & wake any higher level process
	 * that was blocked waiting for space.  If the length
	 * is non-zero and the ack didn't move, we're the
	 * receiver side.  If we're getting packets in-order
	 * (the reassembly queue is empty), add the data to
	 * the socket buffer and note that we need a delayed ack.
	 *
	 * XXX Some of these tests are not needed
	 * eg: the tiwin == tp->snd_wnd prevents many more
	 * predictions.. with no *real* advantage..
	 */
	if (tp->t_state == TCPS_ESTABLISHED &&
	    (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
	    ti->ti_seq == tp->rcv_nxt &&
	    tiwin && tiwin == tp->snd_wnd &&
	    tp->snd_nxt == tp->snd_max) {
		if (ti->ti_len == 0) {
			if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
			    SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
			    tp->snd_cwnd >= tp->snd_wnd) {
				/*
				 * this is a pure ack for outstanding data.
				 */
				if (tp->t_rtt &&
				    SEQ_GT(ti->ti_ack, tp->t_rtseq))
					tcp_xmit_timer(tp, tp->t_rtt);
				acked = ti->ti_ack - tp->snd_una;
				sbdrop(&so->so_snd, acked);
				tp->snd_una = ti->ti_ack;
				m_free(m);

				/*
				 * If all outstanding data are acked, stop
				 * retransmit timer, otherwise restart timer
				 * using current (possibly backed-off) value.
				 * If process is waiting for space,
				 * wakeup/selwakeup/signal.  If data
				 * are ready to send, let tcp_output
				 * decide between more output or persist.
				 */
				if (tp->snd_una == tp->snd_max)
					tp->t_timer[TCPT_REXMT] = 0;
				else if (tp->t_timer[TCPT_PERSIST] == 0)
					tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;

				/*
				 * This is called because sowwakeup might have
				 * put data into so_snd.  Since we don't so sowwakeup,
				 * we don't need this.. XXX???
				 */
				if (so->so_snd.sb_cc)
					(void) tcp_output(tp);

				return;
			}
		} else if (ti->ti_ack == tp->snd_una &&
		    tcpfrag_list_empty(tp) &&
		    ti->ti_len <= sbspace(&so->so_rcv)) {
			/*
			 * this is a pure, in-sequence data packet
			 * with nothing on the reassembly queue and
			 * we have enough buffer space to take it.
			 */
			tp->rcv_nxt += ti->ti_len;
			/*
			 * Add data to socket buffer.
			 */
			if (so->so_emu) {
				if (tcp_emu(so,m)) sbappend(so, m);
			} else
				sbappend(so, m);

			/*
			 * If this is a short packet, then ACK now - with Nagel
			 *	congestion avoidance sender won't send more until
			 *	he gets an ACK.
			 *
			 * It is better to not delay acks at all to maximize
			 * TCP throughput.  See RFC 2581.
			 */
			tp->t_flags |= TF_ACKNOW;
			tcp_output(tp);
			return;
		}
	} /* header prediction */
	/*
	 * Calculate amount of space in receive window,
	 * and then do TCP input processing.
	 * Receive window is amount of space in rcv queue,
	 * but not less than advertised window.
	 */
	{ int win;
          win = sbspace(&so->so_rcv);
	  if (win < 0)
	    win = 0;
	  tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
	}

	switch (tp->t_state) {

	/*
	 * If the state is LISTEN then ignore segment if it contains an RST.
	 * If the segment contains an ACK then it is bad and send a RST.
	 * If it does not contain a SYN then it is not interesting; drop it.
	 * Don't bother responding if the destination was a broadcast.
	 * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
	 * tp->iss, and send a segment:
	 *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
	 * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
	 * Fill in remote peer address fields if not previously specified.
	 * Enter SYN_RECEIVED state, and process any other fields of this
	 * segment in this state.
	 */
	case TCPS_LISTEN: {

	  if (tiflags & TH_RST)
	    goto drop;
	  if (tiflags & TH_ACK)
	    goto dropwithreset;
	  if ((tiflags & TH_SYN) == 0)
	    goto drop;

	  /*
	   * This has way too many gotos...
	   * But a bit of spaghetti code never hurt anybody :)
	   */

	  /*
	   * If this is destined for the control address, then flag to
	   * tcp_ctl once connected, otherwise connect
	   */
	  if ((so->so_faddr.s_addr & slirp->vnetwork_mask.s_addr) ==
	      slirp->vnetwork_addr.s_addr) {
	    if (so->so_faddr.s_addr != slirp->vhost_addr.s_addr &&
		so->so_faddr.s_addr != slirp->vnameserver_addr.s_addr) {
		/* May be an add exec */
		for (ex_ptr = slirp->exec_list; ex_ptr;
		     ex_ptr = ex_ptr->ex_next) {
		  if(ex_ptr->ex_fport == so->so_fport &&
		     so->so_faddr.s_addr == ex_ptr->ex_addr.s_addr) {
		    so->so_state |= SS_CTL;
		    break;
		  }
		}
		if (so->so_state & SS_CTL) {
		    goto cont_input;
		}
	    }
	    /* CTL_ALIAS: Do nothing, tcp_fconnect will be called on it */
	  }

	  if (so->so_emu & EMU_NOCONNECT) {
	    so->so_emu &= ~EMU_NOCONNECT;
	    goto cont_input;
	  }

	  if((tcp_fconnect(so) == -1) && (errno != EINPROGRESS) && (errno != EWOULDBLOCK)) {
	    u_char code=ICMP_UNREACH_NET;
	    DEBUG_MISC((dfd, " tcp fconnect errno = %d-%s\n",
			errno,strerror(errno)));
	    if(errno == ECONNREFUSED) {
	      /* ACK the SYN, send RST to refuse the connection */
	      tcp_respond(tp, ti, m, ti->ti_seq+1, (tcp_seq)0,
			  TH_RST|TH_ACK);
	    } else {
	      if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
	      HTONL(ti->ti_seq);             /* restore tcp header */
	      HTONL(ti->ti_ack);
	      HTONS(ti->ti_win);
	      HTONS(ti->ti_urp);
	      m->m_data -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
	      m->m_len  += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
	      *ip=save_ip;
	      icmp_error(m, ICMP_UNREACH,code, 0,strerror(errno));
	    }
            tcp_close(tp);
	    m_free(m);
	  } else {
	    /*
	     * Haven't connected yet, save the current mbuf
	     * and ti, and return
	     * XXX Some OS's don't tell us whether the connect()
	     * succeeded or not.  So we must time it out.
	     */
	    so->so_m = m;
	    so->so_ti = ti;
	    tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
	    tp->t_state = TCPS_SYN_RECEIVED;
	    tcp_template(tp);
	  }
	  return;

	cont_conn:
	  /* m==NULL
	   * Check if the connect succeeded
	   */
	  if (so->so_state & SS_NOFDREF) {
	    tp = tcp_close(tp);
	    goto dropwithreset;
	  }
	cont_input:
	  tcp_template(tp);

	  if (optp)
	    tcp_dooptions(tp, (u_char *)optp, optlen, ti);

	  if (iss)
	    tp->iss = iss;
	  else
	    tp->iss = slirp->tcp_iss;
	  slirp->tcp_iss += TCP_ISSINCR/2;
	  tp->irs = ti->ti_seq;
	  tcp_sendseqinit(tp);
	  tcp_rcvseqinit(tp);
	  tp->t_flags |= TF_ACKNOW;
	  tp->t_state = TCPS_SYN_RECEIVED;
	  tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
	  goto trimthenstep6;
	} /* case TCPS_LISTEN */

	/*
	 * If the state is SYN_SENT:
	 *	if seg contains an ACK, but not for our SYN, drop the input.
	 *	if seg contains a RST, then drop the connection.
	 *	if seg does not contain SYN, then drop it.
	 * Otherwise this is an acceptable SYN segment
	 *	initialize tp->rcv_nxt and tp->irs
	 *	if seg contains ack then advance tp->snd_una
	 *	if SYN has been acked change to ESTABLISHED else SYN_RCVD state
	 *	arrange for segment to be acked (eventually)
	 *	continue processing rest of data/controls, beginning with URG
	 */
	case TCPS_SYN_SENT:
		if ((tiflags & TH_ACK) &&
		    (SEQ_LEQ(ti->ti_ack, tp->iss) ||
		     SEQ_GT(ti->ti_ack, tp->snd_max)))
			goto dropwithreset;

		if (tiflags & TH_RST) {
                        if (tiflags & TH_ACK) {
                                tcp_drop(tp, 0); /* XXX Check t_softerror! */
                        }
			goto drop;
		}

		if ((tiflags & TH_SYN) == 0)
			goto drop;
		if (tiflags & TH_ACK) {
			tp->snd_una = ti->ti_ack;
			if (SEQ_LT(tp->snd_nxt, tp->snd_una))
				tp->snd_nxt = tp->snd_una;
		}

		tp->t_timer[TCPT_REXMT] = 0;
		tp->irs = ti->ti_seq;
		tcp_rcvseqinit(tp);
		tp->t_flags |= TF_ACKNOW;
		if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
			soisfconnected(so);
			tp->t_state = TCPS_ESTABLISHED;

			(void) tcp_reass(tp, (struct tcpiphdr *)0,
				(struct mbuf *)0);
			/*
			 * if we didn't have to retransmit the SYN,
			 * use its rtt as our initial srtt & rtt var.
			 */
			if (tp->t_rtt)
				tcp_xmit_timer(tp, tp->t_rtt);
		} else
			tp->t_state = TCPS_SYN_RECEIVED;

trimthenstep6:
		/*
		 * Advance ti->ti_seq to correspond to first data byte.
		 * If data, trim to stay within window,
		 * dropping FIN if necessary.
		 */
		ti->ti_seq++;
		if (ti->ti_len > tp->rcv_wnd) {
			todrop = ti->ti_len - tp->rcv_wnd;
			m_adj(m, -todrop);
			ti->ti_len = tp->rcv_wnd;
			tiflags &= ~TH_FIN;
		}
		tp->snd_wl1 = ti->ti_seq - 1;
		tp->rcv_up = ti->ti_seq;
		goto step6;
	} /* switch tp->t_state */
	/*
	 * States other than LISTEN or SYN_SENT.
	 * Check that at least some bytes of segment are within
	 * receive window.  If segment begins before rcv_nxt,
	 * drop leading data (and SYN); if nothing left, just ack.
	 */
	todrop = tp->rcv_nxt - ti->ti_seq;
	if (todrop > 0) {
		if (tiflags & TH_SYN) {
			tiflags &= ~TH_SYN;
			ti->ti_seq++;
			if (ti->ti_urp > 1)
				ti->ti_urp--;
			else
				tiflags &= ~TH_URG;
			todrop--;
		}
		/*
		 * Following if statement from Stevens, vol. 2, p. 960.
		 */
		if (todrop > ti->ti_len
		    || (todrop == ti->ti_len && (tiflags & TH_FIN) == 0)) {
			/*
			 * Any valid FIN must be to the left of the window.
			 * At this point the FIN must be a duplicate or out
			 * of sequence; drop it.
			 */
			tiflags &= ~TH_FIN;

			/*
			 * Send an ACK to resynchronize and drop any data.
			 * But keep on processing for RST or ACK.
			 */
			tp->t_flags |= TF_ACKNOW;
			todrop = ti->ti_len;
		}
		m_adj(m, todrop);
		ti->ti_seq += todrop;
		ti->ti_len -= todrop;
		if (ti->ti_urp > todrop)
			ti->ti_urp -= todrop;
		else {
			tiflags &= ~TH_URG;
			ti->ti_urp = 0;
		}
	}
	/*
	 * If new data are received on a connection after the
	 * user processes are gone, then RST the other end.
	 */
	if ((so->so_state & SS_NOFDREF) &&
	    tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
		tp = tcp_close(tp);
		goto dropwithreset;
	}

	/*
	 * If segment ends after window, drop trailing data
	 * (and PUSH and FIN); if nothing left, just ACK.
	 */
	todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
	if (todrop > 0) {
		if (todrop >= ti->ti_len) {
			/*
			 * If a new connection request is received
			 * while in TIME_WAIT, drop the old connection
			 * and start over if the sequence numbers
			 * are above the previous ones.
			 */
			if (tiflags & TH_SYN &&
			    tp->t_state == TCPS_TIME_WAIT &&
			    SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
				iss = tp->rcv_nxt + TCP_ISSINCR;
				tp = tcp_close(tp);
				goto findso;
			}
			/*
			 * If window is closed can only take segments at
			 * window edge, and have to drop data and PUSH from
			 * incoming segments.  Continue processing, but
			 * remember to ack.  Otherwise, drop segment
			 * and ack.
			 */
			if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
				tp->t_flags |= TF_ACKNOW;
			} else {
				goto dropafterack;
			}
		}
		m_adj(m, -todrop);
		ti->ti_len -= todrop;
		tiflags &= ~(TH_PUSH|TH_FIN);
	}

	/*
	 * If the RST bit is set examine the state:
	 *    SYN_RECEIVED STATE:
	 *	If passive open, return to LISTEN state.
	 *	If active open, inform user that connection was refused.
	 *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
	 *	Inform user that connection was reset, and close tcb.
	 *    CLOSING, LAST_ACK, TIME_WAIT STATES
	 *	Close the tcb.
	 */
	if (tiflags&TH_RST) switch (tp->t_state) {

	case TCPS_SYN_RECEIVED:
	case TCPS_ESTABLISHED:
	case TCPS_FIN_WAIT_1:
	case TCPS_FIN_WAIT_2:
	case TCPS_CLOSE_WAIT:
		tp->t_state = TCPS_CLOSED;
                tcp_close(tp);
		goto drop;

	case TCPS_CLOSING:
	case TCPS_LAST_ACK:
	case TCPS_TIME_WAIT:
                tcp_close(tp);
		goto drop;
	}

	/*
	 * If a SYN is in the window, then this is an
	 * error and we send an RST and drop the connection.
	 */
	if (tiflags & TH_SYN) {
		tp = tcp_drop(tp,0);
		goto dropwithreset;
	}

	/*
	 * If the ACK bit is off we drop the segment and return.
	 */
	if ((tiflags & TH_ACK) == 0) goto drop;

	/*
	 * Ack processing.
	 */
	switch (tp->t_state) {
	/*
	 * In SYN_RECEIVED state if the ack ACKs our SYN then enter
	 * ESTABLISHED state and continue processing, otherwise
	 * send an RST.  una<=ack<=max
	 */
	case TCPS_SYN_RECEIVED:

		if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
		    SEQ_GT(ti->ti_ack, tp->snd_max))
			goto dropwithreset;
		tp->t_state = TCPS_ESTABLISHED;
		/*
		 * The sent SYN is ack'ed with our sequence number +1
		 * The first data byte already in the buffer will get
		 * lost if no correction is made.  This is only needed for
		 * SS_CTL since the buffer is empty otherwise.
		 * tp->snd_una++; or:
		 */
		tp->snd_una=ti->ti_ack;
		if (so->so_state & SS_CTL) {
		  /* So tcp_ctl reports the right state */
		  ret = tcp_ctl(so);
		  if (ret == 1) {
		    soisfconnected(so);
		    so->so_state &= ~SS_CTL;   /* success XXX */
		  } else if (ret == 2) {
		    so->so_state &= SS_PERSISTENT_MASK;
		    so->so_state |= SS_NOFDREF; /* CTL_CMD */
		  } else {
		    needoutput = 1;
		    tp->t_state = TCPS_FIN_WAIT_1;
		  }
		} else {
		  soisfconnected(so);
		}

		(void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
		tp->snd_wl1 = ti->ti_seq - 1;
		/* Avoid ack processing; snd_una==ti_ack  =>  dup ack */
		goto synrx_to_est;
		/* fall into ... */

	/*
	 * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
	 * ACKs.  If the ack is in the range
	 *	tp->snd_una < ti->ti_ack <= tp->snd_max
	 * then advance tp->snd_una to ti->ti_ack and drop
	 * data from the retransmission queue.  If this ACK reflects
	 * more up to date window information we update our window information.
	 */
	case TCPS_ESTABLISHED:
	case TCPS_FIN_WAIT_1:
	case TCPS_FIN_WAIT_2:
	case TCPS_CLOSE_WAIT:
	case TCPS_CLOSING:
	case TCPS_LAST_ACK:
	case TCPS_TIME_WAIT:

		if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
			if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
			  DEBUG_MISC((dfd, " dup ack  m = %lx  so = %lx\n",
				      (long )m, (long )so));
				/*
				 * If we have outstanding data (other than
				 * a window probe), this is a completely
				 * duplicate ack (ie, window info didn't
				 * change), the ack is the biggest we've
				 * seen and we've seen exactly our rexmt
				 * threshold of them, assume a packet
				 * has been dropped and retransmit it.
				 * Kludge snd_nxt & the congestion
				 * window so we send only this one
				 * packet.
				 *
				 * We know we're losing at the current
				 * window size so do congestion avoidance
				 * (set ssthresh to half the current window
				 * and pull our congestion window back to
				 * the new ssthresh).
				 *
				 * Dup acks mean that packets have left the
				 * network (they're now cached at the receiver)
				 * so bump cwnd by the amount in the receiver
				 * to keep a constant cwnd packets in the
				 * network.
				 */
				if (tp->t_timer[TCPT_REXMT] == 0 ||
				    ti->ti_ack != tp->snd_una)
					tp->t_dupacks = 0;
				else if (++tp->t_dupacks == TCPREXMTTHRESH) {
					tcp_seq onxt = tp->snd_nxt;
					u_int win =
					    min(tp->snd_wnd, tp->snd_cwnd) / 2 /
						tp->t_maxseg;

					if (win < 2)
						win = 2;
					tp->snd_ssthresh = win * tp->t_maxseg;
					tp->t_timer[TCPT_REXMT] = 0;
					tp->t_rtt = 0;
					tp->snd_nxt = ti->ti_ack;
					tp->snd_cwnd = tp->t_maxseg;
					(void) tcp_output(tp);
					tp->snd_cwnd = tp->snd_ssthresh +
					       tp->t_maxseg * tp->t_dupacks;
					if (SEQ_GT(onxt, tp->snd_nxt))
						tp->snd_nxt = onxt;
					goto drop;
				} else if (tp->t_dupacks > TCPREXMTTHRESH) {
					tp->snd_cwnd += tp->t_maxseg;
					(void) tcp_output(tp);
					goto drop;
				}
			} else
				tp->t_dupacks = 0;
			break;
		}
	synrx_to_est:
		/*
		 * If the congestion window was inflated to account
		 * for the other side's cached packets, retract it.
		 */
		if (tp->t_dupacks > TCPREXMTTHRESH &&
		    tp->snd_cwnd > tp->snd_ssthresh)
			tp->snd_cwnd = tp->snd_ssthresh;
		tp->t_dupacks = 0;
		if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
			goto dropafterack;
		}
		acked = ti->ti_ack - tp->snd_una;

		/*
		 * If transmit timer is running and timed sequence
		 * number was acked, update smoothed round trip time.
		 * Since we now have an rtt measurement, cancel the
		 * timer backoff (cf., Phil Karn's retransmit alg.).
		 * Recompute the initial retransmit timer.
		 */
		if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
			tcp_xmit_timer(tp,tp->t_rtt);

		/*
		 * If all outstanding data is acked, stop retransmit
		 * timer and remember to restart (more output or persist).
		 * If there is more data to be acked, restart retransmit
		 * timer, using current (possibly backed-off) value.
		 */
		if (ti->ti_ack == tp->snd_max) {
			tp->t_timer[TCPT_REXMT] = 0;
			needoutput = 1;
		} else if (tp->t_timer[TCPT_PERSIST] == 0)
			tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
		/*
		 * When new data is acked, open the congestion window.
		 * If the window gives us less than ssthresh packets
		 * in flight, open exponentially (maxseg per packet).
		 * Otherwise open linearly: maxseg per window
		 * (maxseg^2 / cwnd per packet).
		 */
		{
		  register u_int cw = tp->snd_cwnd;
		  register u_int incr = tp->t_maxseg;

		  if (cw > tp->snd_ssthresh)
		    incr = incr * incr / cw;
		  tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
		}
		if (acked > so->so_snd.sb_cc) {
			tp->snd_wnd -= so->so_snd.sb_cc;
			sbdrop(&so->so_snd, (int )so->so_snd.sb_cc);
			ourfinisacked = 1;
		} else {
			sbdrop(&so->so_snd, acked);
			tp->snd_wnd -= acked;
			ourfinisacked = 0;
		}
		tp->snd_una = ti->ti_ack;
		if (SEQ_LT(tp->snd_nxt, tp->snd_una))
			tp->snd_nxt = tp->snd_una;

		switch (tp->t_state) {

		/*
		 * In FIN_WAIT_1 STATE in addition to the processing
		 * for the ESTABLISHED state if our FIN is now acknowledged
		 * then enter FIN_WAIT_2.
		 */
		case TCPS_FIN_WAIT_1:
			if (ourfinisacked) {
				/*
				 * If we can't receive any more
				 * data, then closing user can proceed.
				 * Starting the timer is contrary to the
				 * specification, but if we don't get a FIN
				 * we'll hang forever.
				 */
				if (so->so_state & SS_FCANTRCVMORE) {
					tp->t_timer[TCPT_2MSL] = TCP_MAXIDLE;
				}
				tp->t_state = TCPS_FIN_WAIT_2;
			}
			break;

	 	/*
		 * In CLOSING STATE in addition to the processing for
		 * the ESTABLISHED state if the ACK acknowledges our FIN
		 * then enter the TIME-WAIT state, otherwise ignore
		 * the segment.
		 */
		case TCPS_CLOSING:
			if (ourfinisacked) {
				tp->t_state = TCPS_TIME_WAIT;
				tcp_canceltimers(tp);
				tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			}
			break;

		/*
		 * In LAST_ACK, we may still be waiting for data to drain
		 * and/or to be acked, as well as for the ack of our FIN.
		 * If our FIN is now acknowledged, delete the TCB,
		 * enter the closed state and return.
		 */
		case TCPS_LAST_ACK:
			if (ourfinisacked) {
                                tcp_close(tp);
				goto drop;
			}
			break;

		/*
		 * In TIME_WAIT state the only thing that should arrive
		 * is a retransmission of the remote FIN.  Acknowledge
		 * it and restart the finack timer.
		 */
		case TCPS_TIME_WAIT:
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			goto dropafterack;
		}
	} /* switch(tp->t_state) */

step6:
	/*
	 * Update window information.
	 * Don't look at window if no ACK: TAC's send garbage on first SYN.
	 */
	if ((tiflags & TH_ACK) &&
	    (SEQ_LT(tp->snd_wl1, ti->ti_seq) ||
	    (tp->snd_wl1 == ti->ti_seq && (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
	    (tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))))) {
		tp->snd_wnd = tiwin;
		tp->snd_wl1 = ti->ti_seq;
		tp->snd_wl2 = ti->ti_ack;
		if (tp->snd_wnd > tp->max_sndwnd)
			tp->max_sndwnd = tp->snd_wnd;
		needoutput = 1;
	}

	/*
	 * Process segments with URG.
	 */
	if ((tiflags & TH_URG) && ti->ti_urp &&
	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
		/*
		 * This is a kludge, but if we receive and accept
		 * random urgent pointers, we'll crash in
		 * soreceive.  It's hard to imagine someone
		 * actually wanting to send this much urgent data.
		 */
		if (ti->ti_urp + so->so_rcv.sb_cc > so->so_rcv.sb_datalen) {
			ti->ti_urp = 0;
			tiflags &= ~TH_URG;
			goto dodata;
		}
		/*
		 * If this segment advances the known urgent pointer,
		 * then mark the data stream.  This should not happen
		 * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
		 * a FIN has been received from the remote side.
		 * In these states we ignore the URG.
		 *
		 * According to RFC961 (Assigned Protocols),
		 * the urgent pointer points to the last octet
		 * of urgent data.  We continue, however,
		 * to consider it to indicate the first octet
		 * of data past the urgent section as the original
		 * spec states (in one of two places).
		 */
		if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
			tp->rcv_up = ti->ti_seq + ti->ti_urp;
			so->so_urgc =  so->so_rcv.sb_cc +
				(tp->rcv_up - tp->rcv_nxt); /* -1; */
			tp->rcv_up = ti->ti_seq + ti->ti_urp;

		}
	} else
		/*
		 * If no out of band data is expected,
		 * pull receive urgent pointer along
		 * with the receive window.
		 */
		if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
			tp->rcv_up = tp->rcv_nxt;
dodata:

	/*
	 * Process the segment text, merging it into the TCP sequencing queue,
	 * and arranging for acknowledgment of receipt if necessary.
	 * This process logically involves adjusting tp->rcv_wnd as data
	 * is presented to the user (this happens in tcp_usrreq.c,
	 * case PRU_RCVD).  If a FIN has already been received on this
	 * connection then we just ignore the text.
	 */
	if ((ti->ti_len || (tiflags&TH_FIN)) &&
	    TCPS_HAVERCVDFIN(tp->t_state) == 0) {
		TCP_REASS(tp, ti, m, so, tiflags);
	} else {
		m_free(m);
		tiflags &= ~TH_FIN;
	}

	/*
	 * If FIN is received ACK the FIN and let the user know
	 * that the connection is closing.
	 */
	if (tiflags & TH_FIN) {
		if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
			/*
			 * If we receive a FIN we can't send more data,
			 * set it SS_FDRAIN
                         * Shutdown the socket if there is no rx data in the
			 * buffer.
			 * soread() is called on completion of shutdown() and
			 * will got to TCPS_LAST_ACK, and use tcp_output()
			 * to send the FIN.
			 */
			sofwdrain(so);

			tp->t_flags |= TF_ACKNOW;
			tp->rcv_nxt++;
		}
		switch (tp->t_state) {

	 	/*
		 * In SYN_RECEIVED and ESTABLISHED STATES
		 * enter the CLOSE_WAIT state.
		 */
		case TCPS_SYN_RECEIVED:
		case TCPS_ESTABLISHED:
		  if(so->so_emu == EMU_CTL)        /* no shutdown on socket */
		    tp->t_state = TCPS_LAST_ACK;
		  else
		    tp->t_state = TCPS_CLOSE_WAIT;
		  break;

	 	/*
		 * If still in FIN_WAIT_1 STATE FIN has not been acked so
		 * enter the CLOSING state.
		 */
		case TCPS_FIN_WAIT_1:
			tp->t_state = TCPS_CLOSING;
			break;

	 	/*
		 * In FIN_WAIT_2 state enter the TIME_WAIT state,
		 * starting the time-wait timer, turning off the other
		 * standard timers.
		 */
		case TCPS_FIN_WAIT_2:
			tp->t_state = TCPS_TIME_WAIT;
			tcp_canceltimers(tp);
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			break;

		/*
		 * In TIME_WAIT state restart the 2 MSL time_wait timer.
		 */
		case TCPS_TIME_WAIT:
			tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
			break;
		}
	}

	/*
	 * If this is a small packet, then ACK now - with Nagel
	 *      congestion avoidance sender won't send more until
	 *      he gets an ACK.
	 *
	 * See above.
	 */
	if (ti->ti_len && (unsigned)ti->ti_len <= 5 &&
	    ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
		tp->t_flags |= TF_ACKNOW;
	}

	/*
	 * Return any desired output.
	 */
	if (needoutput || (tp->t_flags & TF_ACKNOW)) {
		(void) tcp_output(tp);
	}
	return;

dropafterack:
	/*
	 * Generate an ACK dropping incoming segment if it occupies
	 * sequence space, where the ACK reflects our state.
	 */
	if (tiflags & TH_RST)
		goto drop;
	m_free(m);
	tp->t_flags |= TF_ACKNOW;
	(void) tcp_output(tp);
	return;

dropwithreset:
	/* reuses m if m!=NULL, m_free() unnecessary */
	if (tiflags & TH_ACK)
		tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
	else {
		if (tiflags & TH_SYN) ti->ti_len++;
		tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
		    TH_RST|TH_ACK);
	}

	return;

drop:
	/*
	 * Drop space held by incoming segment and return.
	 */
	m_free(m);

	return;
}
Beispiel #25
0
/*
 * copies mbuf chain to the uio scatter/gather list
 */
int
nfsm_mbuftouio(struct mbuf **mrep, struct uio *uiop, int siz, caddr_t *dpos)
{
	char *mbufcp, *uiocp;
	int xfer, left, len;
	struct mbuf *mp;
	long uiosiz, rem;
	int error = 0;

	mp = *mrep;
	mbufcp = *dpos;
	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
	rem = nfsm_rndup(siz)-siz;
	while (siz > 0) {
		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
			return (EFBIG);
		left = uiop->uio_iov->iov_len;
		uiocp = uiop->uio_iov->iov_base;
		if (left > siz)
			left = siz;
		uiosiz = left;
		while (left > 0) {
			while (len == 0) {
				mp = mp->m_next;
				if (mp == NULL)
					return (EBADRPC);
				mbufcp = mtod(mp, caddr_t);
				len = mp->m_len;
			}
			xfer = (left > len) ? len : left;
#ifdef notdef
			/* Not Yet.. */
			if (uiop->uio_iov->iov_op != NULL)
				(*(uiop->uio_iov->iov_op))
				(mbufcp, uiocp, xfer);
			else
#endif
			if (uiop->uio_segflg == UIO_SYSSPACE)
				bcopy(mbufcp, uiocp, xfer);
			else
				copyout(mbufcp, uiocp, xfer);
			left -= xfer;
			len -= xfer;
			mbufcp += xfer;
			uiocp += xfer;
			uiop->uio_offset += xfer;
			uiop->uio_resid -= xfer;
		}
		if (uiop->uio_iov->iov_len <= siz) {
			uiop->uio_iovcnt--;
			uiop->uio_iov++;
		} else {
			uiop->uio_iov->iov_base =
			    (char *)uiop->uio_iov->iov_base + uiosiz;
			uiop->uio_iov->iov_len -= uiosiz;
		}
		siz -= uiosiz;
	}
	*dpos = mbufcp;
	*mrep = mp;
	if (rem > 0) {
		if (len < rem)
			error = nfs_adv(mrep, dpos, rem, len);
		else
			*dpos += rem;
	}
	return (error);
}
Beispiel #26
0
/*
 * The output routine. Takes a packet and encapsulates it in the protocol
 * given by sc->g_proto. See also RFC 1701 and RFC 2004
 */
static int
gre_output_serialized(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
		      struct rtentry *rt)
{
	int error = 0;
	struct gre_softc *sc = ifp->if_softc;
	struct greip *gh;
	struct ip *ip;
	u_short etype = 0;
	struct mobile_h mob_h;
	struct route *ro;
	struct sockaddr_in *ro_dst;

	ASSERT_NETISR_NCPUS(mycpuid);

	/*
	 * gre may cause infinite recursion calls when misconfigured.
	 * We'll prevent this by introducing upper limit.
	 */
	if (++(sc->called) > max_gre_nesting) {
		kprintf("%s: gre_output: recursively called too many "
		       "times(%d)\n", if_name(&sc->sc_if), sc->called);
		m_freem(m);
		error = EIO;    /* is there better errno? */
		goto end;
	}

	if ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) == 0 ||
	    sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
		m_freem(m);
		error = ENETDOWN;
		goto end;
	}

	ro = &sc->route_pcpu[mycpuid];
	ro_dst = (struct sockaddr_in *)&ro->ro_dst;
	if (ro->ro_rt != NULL &&
	    ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
	     ro_dst->sin_addr.s_addr != sc->g_dst.s_addr)) {
		RTFREE(ro->ro_rt);
		ro->ro_rt = NULL;
	}
	if (ro->ro_rt == NULL) {
		error = gre_compute_route(sc, ro);
		if (error) {
			m_freem(m);
			goto end;
		}
	}

	gh = NULL;
	ip = NULL;

	if (ifp->if_bpf) {
		bpf_gettoken();
		if (ifp->if_bpf) {
			uint32_t af = dst->sa_family;

			bpf_ptap(ifp->if_bpf, m, &af, sizeof(af));
		}
		bpf_reltoken();
	}

	m->m_flags &= ~(M_BCAST|M_MCAST);

	if (sc->g_proto == IPPROTO_MOBILE) {
		if (dst->sa_family == AF_INET) {
			struct mbuf *m0;
			int msiz;

			ip = mtod(m, struct ip *);

			/*
			 * RFC2004 specifies that fragmented datagrams shouldn't
			 * be encapsulated.
			 */
			if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
				m_freem(m);
				error = EINVAL;    /* is there better errno? */
				goto end;
			}
			memset(&mob_h, 0, MOB_H_SIZ_L);
			mob_h.proto = (ip->ip_p) << 8;
			mob_h.odst = ip->ip_dst.s_addr;
			ip->ip_dst.s_addr = sc->g_dst.s_addr;

			/*
			 * If the packet comes from our host, we only change
			 * the destination address in the IP header.
			 * Else we also need to save and change the source
			 */
			if (in_hosteq(ip->ip_src, sc->g_src)) {
				msiz = MOB_H_SIZ_S;
			} else {
				mob_h.proto |= MOB_H_SBIT;
				mob_h.osrc = ip->ip_src.s_addr;
				ip->ip_src.s_addr = sc->g_src.s_addr;
				msiz = MOB_H_SIZ_L;
			}
			mob_h.proto = htons(mob_h.proto);
			mob_h.hcrc = gre_in_cksum((u_short *)&mob_h, msiz);

			if ((m->m_data - msiz) < m->m_pktdat) {
				/* need new mbuf */
				MGETHDR(m0, M_NOWAIT, MT_HEADER);
				if (m0 == NULL) {
					m_freem(m);
					error = ENOBUFS;
					goto end;
				}
				m0->m_next = m;
				m->m_data += sizeof(struct ip);
				m->m_len -= sizeof(struct ip);
				m0->m_pkthdr.len = m->m_pkthdr.len + msiz;
				m0->m_len = msiz + sizeof(struct ip);
				m0->m_data += max_linkhdr;
				memcpy(mtod(m0, caddr_t), (caddr_t)ip,
				       sizeof(struct ip));
				m = m0;
			} else {  /* we have some space left in the old one */
				m->m_data -= msiz;
				m->m_len += msiz;
				m->m_pkthdr.len += msiz;
				bcopy(ip, mtod(m, caddr_t),
					sizeof(struct ip));
			}
			ip = mtod(m, struct ip *);
			memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz);
			ip->ip_len = ntohs(ip->ip_len) + msiz;
		} else {  /* AF_INET */
			m_freem(m);
			error = EINVAL;
			goto end;
		}
	} else if (sc->g_proto == IPPROTO_GRE) {
Beispiel #27
0
/*
 * Save an outbound packet for a node in power-save sleep state.
 * The new packet is placed on the node's saved queue, and the TIM
 * is changed, if necessary.
 */
int
ieee80211_pwrsave(struct ieee80211_node *ni, struct mbuf *m)
{
	struct ieee80211_psq *psq = &ni->ni_psq;
	struct ieee80211vap *vap = ni->ni_vap;
	struct ieee80211com *ic = ni->ni_ic;
	struct ieee80211_psq_head *qhead;
	int qlen, age;

	IEEE80211_PSQ_LOCK(psq);
	if (psq->psq_len >= psq->psq_maxlen) {
		psq->psq_drops++;
		IEEE80211_PSQ_UNLOCK(psq);
		IEEE80211_NOTE(vap, IEEE80211_MSG_ANY, ni,
		    "pwr save q overflow, drops %d (size %d)",
		    psq->psq_drops, psq->psq_len);
#ifdef IEEE80211_DEBUG
		if (ieee80211_msg_dumppkts(vap))
			ieee80211_dump_pkt(ni->ni_ic, mtod(m, caddr_t),
			    m->m_len, -1, -1);
#endif
		psq_mfree(m);
		return ENOSPC;
	}
	/*
	 * Tag the frame with it's expiry time and insert it in
	 * the appropriate queue.  The aging interval is 4 times
	 * the listen interval specified by the station. Frames
	 * that sit around too long are reclaimed using this
	 * information.
	 */
	/* TU -> secs.  XXX handle overflow? */
	age = IEEE80211_TU_TO_MS((ni->ni_intval * ic->ic_bintval) << 2) / 1000;
	/*
	 * Encapsulated frames go on the high priority queue,
	 * other stuff goes on the low priority queue.  We use
	 * this to order frames returned out of the driver
	 * ahead of frames we collect in ieee80211_start.
	 */
	if (m->m_flags & M_ENCAP)
		qhead = &psq->psq_head[0];
	else
		qhead = &psq->psq_head[1];
	if (qhead->tail == NULL) {
		struct mbuf *mh;

		qhead->head = m;
		/*
		 * Take care to adjust age when inserting the first
		 * frame of a queue and the other queue already has
		 * frames.  We need to preserve the age difference
		 * relationship so ieee80211_node_psq_age works.
		 */
		if (qhead == &psq->psq_head[1]) {
			mh = psq->psq_head[0].head;
			if (mh != NULL)
				age-= M_AGE_GET(mh);
		} else {
			mh = psq->psq_head[1].head;
			if (mh != NULL) {
				int nage = M_AGE_GET(mh) - age;
				/* XXX is clamping to zero good 'nuf? */
				M_AGE_SET(mh, nage < 0 ? 0 : nage);
			}
		}
	} else {
		qhead->tail->m_nextpkt = m;
		age -= M_AGE_GET(qhead->head);
	}
	KASSERT(age >= 0, ("age %d", age));
	M_AGE_SET(m, age);
	m->m_nextpkt = NULL;
	qhead->tail = m;
	qhead->len++;
	qlen = ++(psq->psq_len);
	IEEE80211_PSQ_UNLOCK(psq);

	IEEE80211_NOTE(vap, IEEE80211_MSG_POWER, ni,
	    "save frame with age %d, %u now queued", age, qlen);

	if (qlen == 1 && vap->iv_set_tim != NULL)
		vap->iv_set_tim(ni, 1);

	return 0;
}
Beispiel #28
0
int
ipsec_process_done(struct mbuf *m, struct ipsecrequest *isr)
{
	struct tdb_ident *tdbi;
	struct m_tag *mtag;
	struct secasvar *sav;
	struct secasindex *saidx;
	int error;

	IPSEC_ASSERT(m != NULL, ("null mbuf"));
	IPSEC_ASSERT(isr != NULL, ("null ISR"));
	IPSEC_ASSERT(isr->sp != NULL, ("NULL isr->sp"));
	sav = isr->sav;
	IPSEC_ASSERT(sav != NULL, ("null SA"));
	IPSEC_ASSERT(sav->sah != NULL, ("null SAH"));

	saidx = &sav->sah->saidx;
	switch (saidx->dst.sa.sa_family) {
#ifdef INET
	case AF_INET:
		/* Fix the header length, for AH processing. */
		mtod(m, struct ip *)->ip_len = htons(m->m_pkthdr.len);
		break;
#endif /* INET */
#ifdef INET6
	case AF_INET6:
		/* Fix the header length, for AH processing. */
		if (m->m_pkthdr.len < sizeof (struct ip6_hdr)) {
			error = ENXIO;
			goto bad;
		}
		if (m->m_pkthdr.len - sizeof (struct ip6_hdr) > IPV6_MAXPACKET) {
			/* No jumbogram support. */
			error = ENXIO;	/*?*/
			goto bad;
		}
		mtod(m, struct ip6_hdr *)->ip6_plen =
			htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
		break;
#endif /* INET6 */
	default:
		DPRINTF(("%s: unknown protocol family %u\n", __func__,
		    saidx->dst.sa.sa_family));
		error = ENXIO;
		goto bad;
	}

	/*
	 * Add a record of what we've done or what needs to be done to the
	 * packet.
	 */
	mtag = m_tag_get(PACKET_TAG_IPSEC_OUT_DONE,
			sizeof(struct tdb_ident), M_NOWAIT);
	if (mtag == NULL) {
		DPRINTF(("%s: could not get packet tag\n", __func__));
		error = ENOMEM;
		goto bad;
	}

	tdbi = (struct tdb_ident *)(mtag + 1);
	tdbi->dst = saidx->dst;
	tdbi->proto = saidx->proto;
	tdbi->spi = sav->spi;
	m_tag_prepend(m, mtag);

	key_sa_recordxfer(sav, m);		/* record data transfer */

	/*
	 * If there's another (bundled) SA to apply, do so.
	 * Note that this puts a burden on the kernel stack size.
	 * If this is a problem we'll need to introduce a queue
	 * to set the packet on so we can unwind the stack before
	 * doing further processing.
	 */
	if (isr->next) {
		/* XXX-BZ currently only support same AF bundles. */
		switch (saidx->dst.sa.sa_family) {
#ifdef INET
		case AF_INET:
			IPSECSTAT_INC(ips_out_bundlesa);
			return (ipsec4_process_packet(m, isr->next));
			/* NOTREACHED */
#endif
#ifdef notyet
#ifdef INET6
		case AF_INET6:
			/* XXX */
			IPSEC6STAT_INC(ips_out_bundlesa);
			return (ipsec6_process_packet(m, isr->next));
			/* NOTREACHED */
#endif /* INET6 */
#endif
		default:
			DPRINTF(("%s: unknown protocol family %u\n", __func__,
			    saidx->dst.sa.sa_family));
			error = ENXIO;
			goto bad;
		}
	}

	/*
	 * We're done with IPsec processing, transmit the packet using the
	 * appropriate network protocol (IP or IPv6).
	 */
	switch (saidx->dst.sa.sa_family) {
#ifdef INET
	case AF_INET:
#ifdef IPSEC_NAT_T
		/*
		 * If NAT-T is enabled, now that all IPsec processing is done
		 * insert UDP encapsulation header after IP header.
		 */
		if (sav->natt_type) {
			struct ip *ip = mtod(m, struct ip *);
			const int hlen = (ip->ip_hl << 2);
			int size, off;
			struct mbuf *mi;
			struct udphdr *udp;

			size = sizeof(struct udphdr);
			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
				/*
				 * draft-ietf-ipsec-nat-t-ike-0[01].txt and
				 * draft-ietf-ipsec-udp-encaps-(00/)01.txt,
				 * ignoring possible AH mode
				 * non-IKE marker + non-ESP marker
				 * from draft-ietf-ipsec-udp-encaps-00.txt.
				 */
				size += sizeof(u_int64_t);
			}
			mi = m_makespace(m, hlen, size, &off);
			if (mi == NULL) {
				DPRINTF(("%s: m_makespace for udphdr failed\n",
				    __func__));
				error = ENOBUFS;
				goto bad;
			}

			udp = (struct udphdr *)(mtod(mi, caddr_t) + off);
			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
				udp->uh_sport = htons(UDP_ENCAP_ESPINUDP_PORT);
			else
				udp->uh_sport =
					KEY_PORTFROMSADDR(&sav->sah->saidx.src);
			udp->uh_dport = KEY_PORTFROMSADDR(&sav->sah->saidx.dst);
			udp->uh_sum = 0;
			udp->uh_ulen = htons(m->m_pkthdr.len - hlen);
			ip->ip_len = htons(m->m_pkthdr.len);
			ip->ip_p = IPPROTO_UDP;

			if (sav->natt_type == UDP_ENCAP_ESPINUDP_NON_IKE)
				*(u_int64_t *)(udp + 1) = 0;
		}
#endif /* IPSEC_NAT_T */

		return ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL);
#endif /* INET */
#ifdef INET6
	case AF_INET6:
		/*
		 * We don't need massage, IPv6 header fields are always in
		 * net endian.
		 */
		return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
#endif /* INET6 */
	}
	panic("ipsec_process_done");
bad:
	m_freem(m);
	return (error);
}
Beispiel #29
0
/*
 * Write an mbuf chain to the destination NIC memory address using
 *	programmed I/O.
 */
u_short
ed_pio_write_mbufs(struct ed_softc *sc, struct mbuf *m, bus_size_t dst)
{
	struct ifnet *ifp = sc->ifp;
	unsigned short total_len, dma_len;
	struct mbuf *mp;
	int     maxwait = 200;	/* about 240us */

	ED_ASSERT_LOCKED(sc);

	/* Regular Novell cards */
	/* First, count up the total number of bytes to copy */
	for (total_len = 0, mp = m; mp; mp = mp->m_next)
		total_len += mp->m_len;

	dma_len = total_len;
	if (sc->isa16bit && (dma_len & 1))
		dma_len++;

	/* select page 0 registers */
	ed_nic_outb(sc, ED_P0_CR, ED_CR_RD2 | ED_CR_STA);

	/* reset remote DMA complete flag */
	ed_nic_outb(sc, ED_P0_ISR, ED_ISR_RDC);

	/* set up DMA byte count */
	ed_nic_outb(sc, ED_P0_RBCR0, dma_len);
	ed_nic_outb(sc, ED_P0_RBCR1, dma_len >> 8);

	/* set up destination address in NIC mem */
	ed_nic_outb(sc, ED_P0_RSAR0, dst);
	ed_nic_outb(sc, ED_P0_RSAR1, dst >> 8);

	/* set remote DMA write */
	ed_nic_outb(sc, ED_P0_CR, ED_CR_RD1 | ED_CR_STA);

  /*
   * Transfer the mbuf chain to the NIC memory.
   * 16-bit cards require that data be transferred as words, and only words.
   * So that case requires some extra code to patch over odd-length mbufs.
   */

	if (!sc->isa16bit) {
		/* NE1000s are easy */
		while (m) {
			if (m->m_len)
				ed_asic_outsb(sc, ED_NOVELL_DATA, 
				    m->m_data, m->m_len);
			m = m->m_next;
		}
	} else {
		/* NE2000s are a pain */
		uint8_t *data;
		int len, wantbyte;
		union {
			uint16_t w;
			uint8_t b[2];
		} saveword;

		wantbyte = 0;

		while (m) {
			len = m->m_len;
			if (len) {
				data = mtod(m, caddr_t);
				/* finish the last word */
				if (wantbyte) {
					saveword.b[1] = *data;
					ed_asic_outw(sc, ED_NOVELL_DATA,
					    saveword.w);
					data++;
					len--;
					wantbyte = 0;
				}
				/* output contiguous words */
				if (len > 1) {
					ed_asic_outsw(sc, ED_NOVELL_DATA,
						      data, len >> 1);
					data += len & ~1;
					len &= 1;
				}
				/* save last byte, if necessary */
				if (len == 1) {
					saveword.b[0] = *data;
					wantbyte = 1;
				}
			}
			m = m->m_next;
		}
		/* spit last byte */
		if (wantbyte)
			ed_asic_outw(sc, ED_NOVELL_DATA, saveword.w);
	}
/*
 * Destination options header processing.
 */
int
dest6_input(struct mbuf **mp, int *offp, int proto)
{
	struct mbuf *m = *mp;
	int off = *offp, dstoptlen, optlen;
	struct ip6_dest *dstopts;
	u_int8_t *opt;

	/* validation of the length of the header */
#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, sizeof(*dstopts), IPPROTO_DONE);
	dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off);
#else
	IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, sizeof(*dstopts));
	if (dstopts == NULL)
		return IPPROTO_DONE;
#endif
	dstoptlen = (dstopts->ip6d_len + 1) << 3;

#ifndef PULLDOWN_TEST
	IP6_EXTHDR_CHECK(m, off, dstoptlen, IPPROTO_DONE);
	dstopts = (struct ip6_dest *)(mtod(m, caddr_t) + off);
#else
	IP6_EXTHDR_GET(dstopts, struct ip6_dest *, m, off, dstoptlen);
	if (dstopts == NULL)
		return IPPROTO_DONE;
#endif
	off += dstoptlen;
	dstoptlen -= sizeof(struct ip6_dest);
	opt = (u_int8_t *)dstopts + sizeof(struct ip6_dest);

	/* search header for all options. */
	for (optlen = 0; dstoptlen > 0; dstoptlen -= optlen, opt += optlen) {
		if (*opt != IP6OPT_PAD1 &&
		    (dstoptlen < IP6OPT_MINLEN || *(opt + 1) + 2 > dstoptlen)) {
			V_ip6stat.ip6s_toosmall++;
			goto bad;
		}

		switch (*opt) {
		case IP6OPT_PAD1:
			optlen = 1;
			break;
		case IP6OPT_PADN:
			optlen = *(opt + 1) + 2;
			break;
		default:		/* unknown option */
			optlen = ip6_unknown_opt(opt, m,
			    opt - mtod(m, u_int8_t *));
			if (optlen == -1)
				return (IPPROTO_DONE);
			optlen += 2;
			break;
		}
	}

	*offp = off;
	return (dstopts->ip6d_nxt);

  bad:
	m_freem(m);
	return (IPPROTO_DONE);
}