Beispiel #1
0
/**
 * xs_udp_data_ready - "data ready" callback for UDP sockets
 * @sk: socket with data to read
 * @len: how much data to read
 *
 */
static void xs_udp_data_ready(struct sock *sk, int len)
{
    struct rpc_task *task;
    struct rpc_xprt *xprt;
    struct rpc_rqst *rovr;
    struct sk_buff *skb;
    int err, repsize, copied;
    u32 _xid, *xp;

    read_lock(&sk->sk_callback_lock);
    dprintk("RPC:      xs_udp_data_ready...\n");
    if (!(xprt = xprt_from_sock(sk)))
        goto out;

    if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
        goto out;

    if (xprt->shutdown)
        goto dropit;

    repsize = skb->len - sizeof(struct udphdr);
    if (repsize < 4) {
        dprintk("RPC:      impossible RPC reply size %d!\n", repsize);
        goto dropit;
    }

    /* Copy the XID from the skb... */
    xp = skb_header_pointer(skb, sizeof(struct udphdr),
                            sizeof(_xid), &_xid);
    if (xp == NULL)
        goto dropit;

    /* Look up and lock the request corresponding to the given XID */
    spin_lock(&xprt->transport_lock);
    rovr = xprt_lookup_rqst(xprt, *xp);
    if (!rovr)
        goto out_unlock;
    task = rovr->rq_task;

    if ((copied = rovr->rq_private_buf.buflen) > repsize)
        copied = repsize;

    /* Suck it into the iovec, verify checksum if not done by hw. */
    if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
        goto out_unlock;

    /* Something worked... */
    dst_confirm(skb->dst);

    xprt_adjust_cwnd(task, copied);
    xprt_update_rtt(task);
    xprt_complete_rqst(task, copied);

out_unlock:
    spin_unlock(&xprt->transport_lock);
dropit:
    skb_free_datagram(sk, skb);
out:
    read_unlock(&sk->sk_callback_lock);
}
Beispiel #2
0
/*
 *	Handle redirects
 */
void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
		  struct neighbour *neigh, int on_link)
{
	struct rt6_info *rt, *nrt;

	/* Locate old route to this destination. */
	rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);

	if (rt == NULL)
		return;

	if (neigh->dev != rt->rt6i_dev)
		goto out;

	/* Redirect received -> path was valid.
	   Look, redirects are sent only in response to data packets,
	   so that this nexthop apparently is reachable. --ANK
	 */
	dst_confirm(&rt->u.dst);

	/* Duplicate redirect: silently ignore. */
	if (neigh == rt->u.dst.neighbour)
		goto out;

	/* Current route is on-link; redirect is always invalid.
	   
	   Seems, previous statement is not true. It could
	   be node, which looks for us as on-link (f.e. proxy ndisc)
	   But then router serving it might decide, that we should
	   know truth 8)8) --ANK (980726).
	 */
	if (!(rt->rt6i_flags&RTF_GATEWAY))
		goto out;

	/*
	 *	RFC 1970 specifies that redirects should only be
	 *	accepted if they come from the nexthop to the target.
	 *	Due to the way default routers are chosen, this notion
	 *	is a bit fuzzy and one might need to check all default
	 *	routers.
	 */

	if (ipv6_addr_cmp(saddr, &rt->rt6i_gateway)) {
		if (rt->rt6i_flags & RTF_DEFAULT) {
			struct rt6_info *rt1;

			read_lock(&rt6_lock);
			for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
				if (!ipv6_addr_cmp(saddr, &rt1->rt6i_gateway)) {
					dst_hold(&rt1->u.dst);
					dst_release(&rt->u.dst);
					read_unlock(&rt6_lock);
					rt = rt1;
					goto source_ok;
				}
			}
			read_unlock(&rt6_lock);
		}
		if (net_ratelimit())
			printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
			       "for redirect target\n");
		goto out;
	}

source_ok:

	/*
	 *	We have finally decided to accept it.
	 */

	nrt = ip6_rt_copy(rt);
	if (nrt == NULL)
		goto out;

	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
	if (on_link)
		nrt->rt6i_flags &= ~RTF_GATEWAY;

	ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
	nrt->rt6i_dst.plen = 128;
	nrt->u.dst.flags |= DST_HOST;

	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
	nrt->rt6i_nexthop = neigh_clone(neigh);
	/* Reset pmtu, it may be better */
	nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
	nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
	if (rt->u.dst.advmss > 65535-20)
		rt->u.dst.advmss = 65535;
	nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);

	if (rt6_ins(nrt, NULL))
		goto out;

	if (rt->rt6i_flags&RTF_CACHE) {
		ip6_del_rt(rt, NULL);
		return;
	}

out:
        dst_release(&rt->u.dst);
	return;
}
Beispiel #3
0
void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
			struct net_device *dev, u32 pmtu)
{
	struct rt6_info *rt, *nrt;

	if (pmtu < IPV6_MIN_MTU) {
		if (net_ratelimit())
			printk(KERN_DEBUG "rt6_pmtu_discovery: invalid MTU value %d\n",
			       pmtu);
		/* According to RFC1981, the PMTU is set to the IPv6 minimum
		   link MTU if the node receives a Packet Too Big message
		   reporting next-hop MTU that is less than the IPv6 minimum MTU.
		 */	
		pmtu = IPV6_MIN_MTU;
	}

	rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);

	if (rt == NULL)
		return;

	if (pmtu >= rt->u.dst.pmtu)
		goto out;

	/* New mtu received -> path was valid.
	   They are sent only in response to data packets,
	   so that this nexthop apparently is reachable. --ANK
	 */
	dst_confirm(&rt->u.dst);

	/* Host route. If it is static, it would be better
	   not to override it, but add new one, so that
	   when cache entry will expire old pmtu
	   would return automatically.
	 */
	if (rt->rt6i_flags & RTF_CACHE) {
		rt->u.dst.pmtu = pmtu;
		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
		goto out;
	}

	/* Network route.
	   Two cases are possible:
	   1. It is connected route. Action: COW
	   2. It is gatewayed route or NONEXTHOP route. Action: clone it.
	 */
	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
		nrt = rt6_cow(rt, daddr, saddr);
		if (!nrt->u.dst.error) {
			nrt->u.dst.pmtu = pmtu;
			/* According to RFC 1981, detecting PMTU increase shouldn't be
			   happened within 5 mins, the recommended timer is 10 mins.
			   Here this route expiration time is set to ip6_rt_mtu_expires 
			   which is 10 mins. After 10 mins the decreased pmtu is expired
			   and detecting PMTU increase will be automatically happened.
			 */
			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
			dst_release(&nrt->u.dst);
		}
	} else {
		nrt = ip6_rt_copy(rt);
		if (nrt == NULL)
			goto out;
		ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
		nrt->rt6i_dst.plen = 128;
		nrt->u.dst.flags |= DST_HOST;
		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
		nrt->u.dst.pmtu = pmtu;
		rt6_ins(nrt, NULL);
	}

out:
	dst_release(&rt->u.dst);
}
Beispiel #4
0
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		       size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	struct flowi4 fl4;
	int free = 0;
	__be32 daddr;
	__be32 saddr;
	u8  tos;
	int err;
	struct ip_options_data opt_copy;

	err = -EMSGSIZE;
	if (len > 0xFFFF)
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */

	/*
	 *	Get and verify the address.
	 */

	if (msg->msg_namelen) {
		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n",
				     __func__, current->comm);
			err = -EAFNOSUPPORT;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
		if (sk->sk_state != TCP_ESTABLISHED)
			goto out;
		daddr = inet->inet_daddr;
	}

	ipc.addr = inet->inet_saddr;
	ipc.opt = NULL;
	ipc.tx_flags = 0;
	ipc.oif = sk->sk_bound_dev_if;

	if (msg->msg_controllen) {
		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt) {
		struct ip_options_rcu *inet_opt;

		rcu_read_lock();
		inet_opt = rcu_dereference(inet->inet_opt);
		if (inet_opt) {
			memcpy(&opt_copy, inet_opt,
			       sizeof(*inet_opt) + inet_opt->opt.optlen);
			ipc.opt = &opt_copy.opt;
		}
		rcu_read_unlock();
	}

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (inet->hdrincl)
			goto done;
		if (ipc.opt->opt.srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->opt.faddr;
		}
	}
	tos = RT_CONN_FLAGS(sk);
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

	if (ipv4_is_multicast(daddr)) {
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
	} else if (!ipc.oif)
		ipc.oif = inet->uc_index;

	flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
			   RT_SCOPE_UNIVERSE,
			   inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
			   inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP |
			    (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
			   daddr, saddr, 0, 0,
			   sock_i_uid(sk));

	if (!inet->hdrincl) {
		err = raw_probe_proto_opt(&fl4, msg);
		if (err)
			goto done;
	}

	security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
	rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		rt = NULL;
		goto done;
	}

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	if (inet->hdrincl)
		err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
				      &rt, msg->msg_flags);

	 else {
		if (!ipc.addr)
			ipc.addr = fl4.daddr;
		lock_sock(sk);
		err = ip_append_data(sk, &fl4, ip_generic_getfrag,
				     msg->msg_iov, len, 0,
				     &ipc, &rt, msg->msg_flags);
		if (err)
			ip_flush_pending_frames(sk);
		else if (!(msg->msg_flags & MSG_MORE)) {
			err = ip_push_pending_frames(sk, &fl4);
			if (err == -ENOBUFS && !inet->recverr)
				err = 0;
		}
		release_sock(sk);
	}
done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

out:
	if (err < 0)
		return err;
	return len;

do_confirm:
	dst_confirm(&rt->dst);
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}
Beispiel #5
0
/* Save metrics learned by this TCP session.  This function is called
 * only, when TCP finishes successfully i.e. when it enters TIME-WAIT
 * or goes from LAST-ACK to CLOSE.
 */
void tcp_update_metrics(struct sock *sk)
{
	const struct inet_connection_sock *icsk = inet_csk(sk);
	struct dst_entry *dst = __sk_dst_get(sk);
	struct tcp_sock *tp = tcp_sk(sk);
	struct tcp_metrics_block *tm;
	unsigned long rtt;
	u32 val;
	int m;

	if (sysctl_tcp_nometrics_save || !dst)
		return;

	if (dst->flags & DST_HOST)
		dst_confirm(dst);

	rcu_read_lock();
	if (icsk->icsk_backoff || !tp->srtt) {
		/* This session failed to estimate rtt. Why?
		 * Probably, no packets returned in time.  Reset our
		 * results.
		 */
		tm = tcp_get_metrics(sk, dst, false);
		if (tm && !tcp_metric_locked(tm, TCP_METRIC_RTT))
			tcp_metric_set(tm, TCP_METRIC_RTT, 0);
		goto out_unlock;
	} else
		tm = tcp_get_metrics(sk, dst, true);

	if (!tm)
		goto out_unlock;

	rtt = tcp_metric_get_jiffies(tm, TCP_METRIC_RTT);
	m = rtt - tp->srtt;

	/* If newly calculated rtt larger than stored one, store new
	 * one. Otherwise, use EWMA. Remember, rtt overestimation is
	 * always better than underestimation.
	 */
	if (!tcp_metric_locked(tm, TCP_METRIC_RTT)) {
		if (m <= 0)
			rtt = tp->srtt;
		else
			rtt -= (m >> 3);
		tcp_metric_set_msecs(tm, TCP_METRIC_RTT, rtt);
	}

	if (!tcp_metric_locked(tm, TCP_METRIC_RTTVAR)) {
		unsigned long var;

		if (m < 0)
			m = -m;

		/* Scale deviation to rttvar fixed point */
		m >>= 1;
		if (m < tp->mdev)
			m = tp->mdev;

		var = tcp_metric_get_jiffies(tm, TCP_METRIC_RTTVAR);
		if (m >= var)
			var = m;
		else
			var -= (var - m) >> 2;

		tcp_metric_set_msecs(tm, TCP_METRIC_RTTVAR, var);
	}
Beispiel #6
0
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		       size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	int free = 0;
	__be32 daddr;
	__be32 saddr;
	u8  tos;
	int err;

	err = -EMSGSIZE;
	if (len > 0xFFFF)
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */

	/*
	 *	Get and verify the address.
	 */

	if (msg->msg_namelen) {
		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			static int complained;
			if (!complained++)
				printk(KERN_INFO "%s forgot to set AF_INET in "
						 "raw sendmsg. Fix it!\n",
						 current->comm);
			err = -EAFNOSUPPORT;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
		if (sk->sk_state != TCP_ESTABLISHED)
			goto out;
		daddr = inet->inet_daddr;
	}

	ipc.addr = inet->inet_saddr;
	ipc.opt = NULL;
	ipc.tx_flags = 0;
	ipc.oif = sk->sk_bound_dev_if;

	if (msg->msg_controllen) {
		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt)
		ipc.opt = inet->opt;

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (inet->hdrincl)
			goto done;
		if (ipc.opt->srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->faddr;
		}
	}
	tos = RT_CONN_FLAGS(sk);
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

	if (ipv4_is_multicast(daddr)) {
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
	}

	{
		struct flowi4 fl4 = {
			.flowi4_oif = ipc.oif,
			.flowi4_mark = sk->sk_mark,
			.daddr = daddr,
			.saddr = saddr,
			.flowi4_tos = tos,
			.flowi4_proto = (inet->hdrincl ?
					 IPPROTO_RAW :
					 sk->sk_protocol),
			.flowi4_flags = FLOWI_FLAG_CAN_SLEEP,
		};
		if (!inet->hdrincl) {
			err = raw_probe_proto_opt(&fl4, msg);
			if (err)
				goto done;
		}

		security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
		rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
		if (IS_ERR(rt)) {
			err = PTR_ERR(rt);
			rt = NULL;
			goto done;
		}
	}

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	if (inet->hdrincl)
		err = raw_send_hdrinc(sk, msg->msg_iov, len,
					&rt, msg->msg_flags);

	 else {
		if (!ipc.addr)
			ipc.addr = rt->rt_dst;
		lock_sock(sk);
		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
					&ipc, &rt, msg->msg_flags);
		if (err)
			ip_flush_pending_frames(sk);
		else if (!(msg->msg_flags & MSG_MORE)) {
			err = ip_push_pending_frames(sk);
			if (err == -ENOBUFS && !inet->recverr)
				err = 0;
		}
		release_sock(sk);
	}
done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

out:
	if (err < 0)
		return err;
	return len;

do_confirm:
	dst_confirm(&rt->dst);
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}

static void raw_close(struct sock *sk, long timeout)
{
	/*
	 * Raw sockets may have direct kernel refereneces. Kill them.
	 */
	ip_ra_control(sk, 0, NULL);

	sk_common_release(sk);
}
int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
		   struct flowi *fl, unsigned length,
		   struct ipv6_txoptions *opt, int hlimit, int flags)
{
	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
	struct in6_addr *final_dst = NULL;
	struct dst_entry *dst;
	int err = 0;
	unsigned int pktlength, jumbolen, mtu;
	struct in6_addr saddr;

	if (opt && opt->srcrt) {
		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
		final_dst = fl->fl6_dst;
		fl->fl6_dst = rt0->addr;
	}

	if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
		fl->oif = np->mcast_oif;

	dst = __sk_dst_check(sk, np->dst_cookie);
	if (dst) {
		struct rt6_info *rt = (struct rt6_info*)dst;

			/* Yes, checking route validity in not connected
			   case is not very simple. Take into account,
			   that we do not support routing by source, TOS,
			   and MSG_DONTROUTE 		--ANK (980726)

			   1. If route was host route, check that
			      cached destination is current.
			      If it is network route, we still may
			      check its validity using saved pointer
			      to the last used address: daddr_cache.
			      We do not want to save whole address now,
			      (because main consumer of this service
			       is tcp, which has not this problem),
			      so that the last trick works only on connected
			      sockets.
			   2. oif also should be the same.
			 */

		if (((rt->rt6i_dst.plen != 128 ||
		      ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
		     && (np->daddr_cache == NULL ||
			 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
		    || (fl->oif && fl->oif != dst->dev->ifindex)) {
			dst = NULL;
		} else
			dst_hold(dst);
	}

	if (dst == NULL)
		dst = ip6_route_output(sk, fl);

	if (dst->error) {
		IP6_INC_STATS(Ip6OutNoRoutes);
		dst_release(dst);
		return -ENETUNREACH;
	}

	if (fl->fl6_src == NULL) {
		err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr);

		if (err) {
#if IP6_DEBUG >= 2
			printk(KERN_DEBUG "ip6_build_xmit: "
			       "no available source address\n");
#endif
			goto out;
		}
		fl->fl6_src = &saddr;
	}
	pktlength = length;

	if (hlimit < 0) {
		if (ipv6_addr_is_multicast(fl->fl6_dst))
			hlimit = np->mcast_hops;
		else
			hlimit = np->hop_limit;
		if (hlimit < 0)
			hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
	}

	jumbolen = 0;

	if (!sk->protinfo.af_inet.hdrincl) {
		pktlength += sizeof(struct ipv6hdr);
		if (opt)
			pktlength += opt->opt_flen + opt->opt_nflen;

		if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
			/* Jumbo datagram.
			   It is assumed, that in the case of hdrincl
			   jumbo option is supplied by user.
			 */
			pktlength += 8;
			jumbolen = pktlength - sizeof(struct ipv6hdr);
		}
	}

	mtu = dst->pmtu;
	if (np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
		else if (np->pmtudisc == IPV6_PMTUDISC_DONT)
			mtu = IPV6_MIN_MTU;
	}

	/* Critical arithmetic overflow check.
	   FIXME: may gcc optimize it out? --ANK (980726)
	 */
	if (pktlength < length) {
		ipv6_local_error(sk, EMSGSIZE, fl, mtu);
		err = -EMSGSIZE;
		goto out;
	}

	if (flags&MSG_CONFIRM)
		dst_confirm(dst);

	if (pktlength <= mtu) {
		struct sk_buff *skb;
		struct ipv6hdr *hdr;
		struct net_device *dev = dst->dev;

		err = 0;
		if (flags&MSG_PROBE)
			goto out;

		skb = sock_alloc_send_skb(sk, pktlength + 15 +
					  dev->hard_header_len,
					  flags & MSG_DONTWAIT, &err);

		if (skb == NULL) {
			IP6_INC_STATS(Ip6OutDiscards);
			goto out;
		}

		skb->dst = dst_clone(dst);

		skb_reserve(skb, (dev->hard_header_len + 15) & ~15);

		hdr = (struct ipv6hdr *) skb->tail;
		skb->nh.ipv6h = hdr;

		if (!sk->protinfo.af_inet.hdrincl) {
			ip6_bld_1(sk, skb, fl, hlimit,
				  jumbolen ? sizeof(struct ipv6hdr) : pktlength);

			if (opt || jumbolen) {
				u8 *prev_hdr = &hdr->nexthdr;
				prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
				if (opt && opt->opt_flen)
					ipv6_build_frag_opts(skb, prev_hdr, opt);
			}
		}

		skb_put(skb, length);
		err = getfrag(data, &hdr->saddr,
			      ((char *) hdr) + (pktlength - length),
			      0, length);

		if (!err) {
			IP6_INC_STATS(Ip6OutRequests);
			err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
		} else {
			err = -EFAULT;
			kfree_skb(skb);
		}
	} else {
		if (sk->protinfo.af_inet.hdrincl || jumbolen ||
		    np->pmtudisc == IPV6_PMTUDISC_DO) {
			ipv6_local_error(sk, EMSGSIZE, fl, mtu);
			err = -EMSGSIZE;
			goto out;
		}

		err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
				    flags, length, mtu);
	}

	/*
	 *	cleanup
	 */
out:
	ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
	if (err > 0)
		err = np->recverr ? net_xmit_errno(err) : 0;
	return err;
}
Beispiel #8
0
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		       size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	int free = 0;
	u32 daddr;
	u32 saddr;
	u8  tos;
	int err;

	err = -EMSGSIZE;
	if (len < 0 || len > 0xFFFF)
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */
			 
	/*
	 *	Get and verify the address. 
	 */

	if (msg->msg_namelen) {
		struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			static int complained;
			if (!complained++)
				printk(KERN_INFO "%s forgot to set AF_INET in "
						 "raw sendmsg. Fix it!\n",
						 current->comm);
			err = -EAFNOSUPPORT;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
		if (sk->sk_state != TCP_ESTABLISHED) 
			goto out;
		daddr = inet->daddr;
	}

	ipc.addr = inet->saddr;
	ipc.opt = NULL;
	ipc.oif = sk->sk_bound_dev_if;

	if (msg->msg_controllen) {
		err = ip_cmsg_send(msg, &ipc);
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt)
		ipc.opt = inet->opt;

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (inet->hdrincl)
			goto done;
		if (ipc.opt->srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->faddr;
		}
	}
	tos = RT_CONN_FLAGS(sk);
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

	if (MULTICAST(daddr)) {
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
	}

	{
		struct flowi fl = { .oif = ipc.oif,
				    .nl_u = { .ip4_u =
					      { .daddr = daddr,
						.saddr = saddr,
						.tos = tos } },
				    .proto = inet->hdrincl ? IPPROTO_RAW :
					    		     sk->sk_protocol,
				  };
		if (!inet->hdrincl)
			raw_probe_proto_opt(&fl, msg);

		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
	}
	if (err)
		goto done;

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST))
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	if (inet->hdrincl)
		err = raw_send_hdrinc(sk, msg->msg_iov, len, 
					rt, msg->msg_flags);
	
	 else {
		if (!ipc.addr)
			ipc.addr = rt->rt_dst;
		lock_sock(sk);
		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
					&ipc, rt, msg->msg_flags);
		if (err)
			ip_flush_pending_frames(sk);
		else if (!(msg->msg_flags & MSG_MORE))
			err = ip_push_pending_frames(sk);
		release_sock(sk);
	}
done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

out:	return err < 0 ? err : len;

do_confirm:
	dst_confirm(&rt->u.dst);
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}
Beispiel #9
0
void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
{
	struct iwch_dev *rnicp;
	struct respQ_msg_t *rsp_msg = (struct respQ_msg_t *) skb->data;
	struct iwch_cq *chp;
	struct iwch_qp *qhp;
	u32 cqid = RSPQ_CQID(rsp_msg);

	rnicp = (struct iwch_dev *) rdev_p->ulp;
	spin_lock(&rnicp->lock);
	chp = get_chp(rnicp, cqid);
	qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
	if (!chp || !qhp) {
		printk(KERN_ERR MOD "BAD AE cqid 0x%x qpid 0x%x opcode %d "
		       "status 0x%x type %d wrid.hi 0x%x wrid.lo 0x%x \n",
		       cqid, CQE_QPID(rsp_msg->cqe),
		       CQE_OPCODE(rsp_msg->cqe), CQE_STATUS(rsp_msg->cqe),
		       CQE_TYPE(rsp_msg->cqe), CQE_WRID_HI(rsp_msg->cqe),
		       CQE_WRID_LOW(rsp_msg->cqe));
		spin_unlock(&rnicp->lock);
		goto out;
	}
	iwch_qp_add_ref(&qhp->ibqp);
	atomic_inc(&chp->refcnt);
	spin_unlock(&rnicp->lock);

	/*
	 * 1) completion of our sending a TERMINATE.
	 * 2) incoming TERMINATE message.
	 */
	if ((CQE_OPCODE(rsp_msg->cqe) == T3_TERMINATE) &&
	    (CQE_STATUS(rsp_msg->cqe) == 0)) {
		if (SQ_TYPE(rsp_msg->cqe)) {
			PDBG("%s QPID 0x%x ep %p disconnecting\n",
			     __func__, qhp->wq.qpid, qhp->ep);
			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
		} else {
			PDBG("%s post REQ_ERR AE QPID 0x%x\n", __func__,
			     qhp->wq.qpid);
			post_qp_event(rnicp, chp, rsp_msg,
				      IB_EVENT_QP_REQ_ERR, 0);
			iwch_ep_disconnect(qhp->ep, 0, GFP_ATOMIC);
		}
		goto done;
	}

	/* Bad incoming Read request */
	if (SQ_TYPE(rsp_msg->cqe) &&
	    (CQE_OPCODE(rsp_msg->cqe) == T3_READ_RESP)) {
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
		goto done;
	}

	/* Bad incoming write */
	if (RQ_TYPE(rsp_msg->cqe) &&
	    (CQE_OPCODE(rsp_msg->cqe) == T3_RDMA_WRITE)) {
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_REQ_ERR, 1);
		goto done;
	}

	switch (CQE_STATUS(rsp_msg->cqe)) {

	/* Completion Events */
	case TPT_ERR_SUCCESS:

		/*
		 * Confirm the destination entry if this is a RECV completion.
		 */
		if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
			dst_confirm(qhp->ep->dst);
		(*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
		break;

	case TPT_ERR_STAG:
	case TPT_ERR_PDID:
	case TPT_ERR_QPID:
	case TPT_ERR_ACCESS:
	case TPT_ERR_WRAP:
	case TPT_ERR_BOUND:
	case TPT_ERR_INVALIDATE_SHARED_MR:
	case TPT_ERR_INVALIDATE_MR_WITH_MW_BOUND:
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_ACCESS_ERR, 1);
		break;

	/* Device Fatal Errors */
	case TPT_ERR_ECC:
	case TPT_ERR_ECC_PSTAG:
	case TPT_ERR_INTERNAL_ERR:
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_DEVICE_FATAL, 1);
		break;

	/* QP Fatal Errors */
	case TPT_ERR_OUT_OF_RQE:
	case TPT_ERR_PBL_ADDR_BOUND:
	case TPT_ERR_CRC:
	case TPT_ERR_MARKER:
	case TPT_ERR_PDU_LEN_ERR:
	case TPT_ERR_DDP_VERSION:
	case TPT_ERR_RDMA_VERSION:
	case TPT_ERR_OPCODE:
	case TPT_ERR_DDP_QUEUE_NUM:
	case TPT_ERR_MSN:
	case TPT_ERR_TBIT:
	case TPT_ERR_MO:
	case TPT_ERR_MSN_GAP:
	case TPT_ERR_MSN_RANGE:
	case TPT_ERR_RQE_ADDR_BOUND:
	case TPT_ERR_IRD_OVERFLOW:
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
		break;

	default:
		printk(KERN_ERR MOD "Unknown T3 status 0x%x QPID 0x%x\n",
		       CQE_STATUS(rsp_msg->cqe), qhp->wq.qpid);
		post_qp_event(rnicp, chp, rsp_msg, IB_EVENT_QP_FATAL, 1);
		break;
	}
done:
	if (atomic_dec_and_test(&chp->refcnt))
	        wake_up(&chp->wait);
	iwch_qp_rem_ref(&qhp->ibqp);
out:
	dev_kfree_skb_irq(skb);
}
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
	struct ipcm_cookie ipc;
	struct rawfakehdr rfh;
	struct rtable *rt = NULL;
	int free = 0;
	u32 daddr;
	u8  tos;
	int err;

	/* This check is ONLY to check for arithmetic overflow
	   on integer(!) len. Not more! Real check will be made
	   in ip_build_xmit --ANK

	   BTW socket.c -> af_*.c -> ... make multiple
	   invalid conversions size_t -> int. We MUST repair it f.e.
	   by replacing all of them with size_t and revise all
	   the places sort of len += sizeof(struct iphdr)
	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
	 */

	err = -EMSGSIZE;
	if (len < 0 || len > 0xFFFF)
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */
			 
	/*
	 *	Get and verify the address. 
	 */

	if (msg->msg_namelen) {
		struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			static int complained;
			if (!complained++)
				printk(KERN_INFO "%s forgot to set AF_INET in "
						 "raw sendmsg. Fix it!\n",
						 current->comm);
			err = -EINVAL;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
		if (sk->state != TCP_ESTABLISHED) 
			goto out;
		daddr = sk->daddr;
	}

	ipc.addr = sk->saddr;
	ipc.opt = NULL;
	ipc.oif = sk->bound_dev_if;

	if (msg->msg_controllen) {
		err = ip_cmsg_send(msg, &ipc);
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	rfh.saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt)
		ipc.opt = sk->protinfo.af_inet.opt;

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (sk->protinfo.af_inet.hdrincl)
			goto done;
		if (ipc.opt->srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->faddr;
		}
	}
	tos = RT_TOS(sk->protinfo.af_inet.tos) | sk->localroute;
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

	if (MULTICAST(daddr)) {
		if (!ipc.oif)
			ipc.oif = sk->protinfo.af_inet.mc_index;
		if (!rfh.saddr)
			rfh.saddr = sk->protinfo.af_inet.mc_addr;
	}

	err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);

	if (err)
		goto done;

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sk->broadcast)
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	rfh.iov		= msg->msg_iov;
	rfh.saddr	= rt->rt_src;
	rfh.dst		= &rt->u.dst;
	if (!ipc.addr)
		ipc.addr = rt->rt_dst;
	err = ip_build_xmit(sk, sk->protinfo.af_inet.hdrincl ? raw_getrawfrag :
		       	    raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags);

done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

out:	return err < 0 ? err : len;

do_confirm:
	dst_confirm(&rt->u.dst);
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}