/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	const struct iphdr *iph = ip_hdr(skb);
	struct rtable *rt;
	struct flowi4 fl4 = {};
	__be32 saddr = iph->saddr;
	__u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
	unsigned int hh_len;

	if (addr_type == RTN_UNSPEC)
		addr_type = inet_addr_type(net, saddr);
	if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
		flags |= FLOWI_FLAG_ANYSRC;
	else
		saddr = 0;

	/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
	 * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook.
	 */
	fl4.daddr = iph->daddr;
	fl4.saddr = saddr;
	fl4.flowi4_tos = RT_TOS(iph->tos);
	fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
	fl4.flowi4_mark = skb->mark;
	fl4.flowi4_flags = flags;
	rt = ip_route_output_key(net, &fl4);
	if (IS_ERR(rt))
		return -1;

	/* Drop old route. */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	if (skb_dst(skb)->error)
		return -1;

#ifdef CONFIG_XFRM
	if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
	    xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
		struct dst_entry *dst = skb_dst(skb);
		skb_dst_set(skb, NULL);
		dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
		if (IS_ERR(dst))
			return -1;
		skb_dst_set(skb, dst);
	}
#endif

	/* Change in oif may mean change in hh_len. */
	hh_len = skb_dst(skb)->dev->hard_header_len;
	if (skb_headroom(skb) < hh_len &&
	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
				0, GFP_ATOMIC))
		return -1;

	return 0;
}
Example #2
0
/*
 *      Direct Routing transmitter
 *      Used for ANY protocol
 */
int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
	      struct ip_vs_protocol *pp)
{
	struct rtable *rt;			/* Route to the other host */
	struct iphdr  *iph = ip_hdr(skb);
	int    mtu;

	EnterFunction(10);

	if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				      RT_TOS(iph->tos), 1|2)))
		goto tx_error_icmp;
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
		ip_rt_put(rt);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error;
	}

	/*
	 * Call ip_send_check because we are not sure it is called
	 * after ip_defrag. Is copy-on-write needed?
	 */
	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
		ip_rt_put(rt);
		return NF_STOLEN;
	}
	ip_send_check(ip_hdr(skb));

	/* drop old route */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);

	LeaveFunction(10);
	return NF_STOLEN;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
}
Example #3
0
/* Try to route the packet according to the routing keys specified in
 * route_info. Keys are :
 *  - ifindex : 
 *      0 if no oif preferred, 
 *      otherwise set to the index of the desired oif
 *  - route_info->gw :
 *      0 if no gateway specified,
 *      otherwise set to the next host to which the pkt must be routed
 * If success, skb->dev is the output device to which the packet must 
 * be sent and skb->dst is not NULL
 *
 * RETURN: -1 if an error occured
 *          1 if the packet was succesfully routed to the 
 *            destination desired
 *          0 if the kernel routing table could not route the packet
 *            according to the keys specified
 */
static int route(struct sk_buff *skb,
		 unsigned int ifindex,
		 const struct ipt_route_target_info *route_info)
{
	int err;
	struct rtable *rt;
	struct iphdr *iph = ip_hdr(skb);
	struct flowi fl = {
		.oif = ifindex,
		.nl_u = {
			.ip4_u = {
				.daddr = iph->daddr,
				.saddr = 0,
				.tos = RT_TOS(iph->tos),
				.scope = RT_SCOPE_UNIVERSE,
			}
		} 
	};
	
	/* The destination address may be overloaded by the target */
	if (route_info->gw)
		fl.fl4_dst = route_info->gw;
	
	/* Trying to route the packet using the standard routing table. */
	if ((err = ip_route_output_key(&init_net,&rt, &fl))) {
		if (net_ratelimit()) 
			pr_debug("ipt_ROUTE: couldn't route pkt (err: %i)",err);
		return -1;
	}
	
	/* Drop old route. */
	skb_dst_drop(skb);

	/* Success if no oif specified or if the oif correspond to the 
	 * one desired */
	if (!ifindex || rt->dst.dev->ifindex == ifindex) {
		skb_dst_set(skb, &rt->dst);
		skb->dev = skb_dst(skb)->dev;
		skb->protocol = htons(ETH_P_IP);
		return 1;
	}
	
	/* The interface selected by the routing table is not the one
	 * specified by the user. This may happen because the dst address
	 * is one of our own addresses.
	 */
	if (net_ratelimit()) 
		pr_debug("ipt_ROUTE: failed to route as desired gw, oif=%i (got oif=%i)\n", 
		//pr_debug("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n", 
		       ifindex, rt->dst.dev->ifindex);
		       //NIPQUAD(route_info->gw), ifindex, rt->dst.dev->ifindex);
	
	return 0;
}
Example #4
0
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
	struct net *net = ovs_dp_get_net(vport->dp);
	struct vxlan_port *vxlan_port = vxlan_vport(vport);
	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
	struct ovs_key_ipv4_tunnel *tun_key;
	struct rtable *rt;
	struct flowi4 fl;
	__be16 src_port;
	__be16 df;
	int err;

	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
		err = -EINVAL;
		goto error;
	}

	tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
	/* Route lookup */
	memset(&fl, 0, sizeof(fl));
	fl.daddr = tun_key->ipv4_dst;
	fl.saddr = tun_key->ipv4_src;
	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
	fl.flowi4_mark = skb->mark;
	fl.flowi4_proto = IPPROTO_UDP;

	rt = ip_route_output_key(net, &fl);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		goto error;
	}

	df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
		htons(IP_DF) : 0;

	skb->ignore_df = 1;

	src_port = udp_flow_src_port(net, skb, 0, 0, true);

	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
			     fl.saddr, tun_key->ipv4_dst,
			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
			     src_port, dst_port,
			     htonl(be64_to_cpu(tun_key->tun_id) << 8),
			     false);
	if (err < 0)
		ip_rt_put(rt);
	return err;
error:
	kfree_skb(skb);
	return err;
}
Example #5
0
static inline struct rtable *route_mirror(struct sk_buff *skb, int local)
{
        struct iphdr *iph = skb->nh.iph;
	struct dst_entry *odst;
	struct rt_key key = {};
	struct rtable *rt;

	if (local) {
		key.dst = iph->saddr;
		key.src = iph->daddr;
		key.tos = RT_TOS(iph->tos);

		if (ip_route_output_key(&rt, &key) != 0)
			return NULL;
	} else {
		/* non-local src, find valid iif to satisfy
		 * rp-filter when calling ip_route_input. */
		key.dst = iph->daddr;
		if (ip_route_output_key(&rt, &key) != 0)
			return NULL;

		odst = skb->dst;
		if (ip_route_input(skb, iph->saddr, iph->daddr,
		                   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
			dst_release(&rt->u.dst);
			return NULL;
		}
		dst_release(&rt->u.dst);
		rt = (struct rtable *)skb->dst;
		skb->dst = odst;
	}

	if (rt->u.dst.error) {
		dst_release(&rt->u.dst);
		rt = NULL;
	}

	return rt;
}
Example #6
0
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
					   struct net_device *vrf_dev)
{
	struct iphdr *ip4h = ip_hdr(skb);
	int ret = NET_XMIT_DROP;
	struct flowi4 fl4 = {
		/* needed to match OIF rule */
		.flowi4_oif = vrf_dev->ifindex,
		.flowi4_iif = LOOPBACK_IFINDEX,
		.flowi4_tos = RT_TOS(ip4h->tos),
		.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC |
				FLOWI_FLAG_SKIP_NH_OIF,
		.daddr = ip4h->daddr,
	};

	if (vrf_send_v4_prep(skb, &fl4, vrf_dev))
		goto err;

	if (!ip4h->saddr) {
		ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0,
					       RT_SCOPE_LINK);
	}

	ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
	if (unlikely(net_xmit_eval(ret)))
		vrf_dev->stats.tx_errors++;
	else
		ret = NET_XMIT_SUCCESS;

out:
	return ret;
err:
	vrf_tx_error(vrf_dev, skb);
	goto out;
}

static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev)
{
	/* strip the ethernet header added for pass through VRF device */
	__skb_pull(skb, skb_network_offset(skb));

	switch (skb->protocol) {
	case htons(ETH_P_IP):
		return vrf_process_v4_outbound(skb, dev);
	case htons(ETH_P_IPV6):
		return vrf_process_v6_outbound(skb, dev);
	default:
		vrf_tx_error(dev, skb);
		return NET_XMIT_DROP;
	}
}
Example #7
0
static int ip_masq_user_maddr(struct ip_masq_user *ums)
{
	struct device *dev;
	struct rtable *rt;
	int ret = -EINVAL;
	u32 rt_daddr, rt_saddr;
	u32 tos;

	/*
	 *	Did specify masq address.
	 */
	if (ums->maddr)
		return 0;

	/*
	 *	Select address to use for routing query
	 */

	rt_daddr = ums->rt_daddr? ums->rt_daddr : ums->daddr;
	rt_saddr = ums->rt_saddr? ums->rt_saddr : ums->saddr;


	/*
	 *	No address for routing, cannot continue
	 */
	if (rt_daddr == 0) {
		IP_MASQ_DEBUG(1-debug, "cannot setup maddr with daddr=%lX, rt_addr=%lX\n",
			     ntohl(ums->daddr), ntohl(ums->rt_daddr));
		return -EINVAL;
	}

	/*
	 *	Find out rt device 
	 */

	rt_saddr = 0; 
	tos = RT_TOS(ums->ip_tos) | RTO_CONN;

	if ((ret=ip_route_output(&rt, rt_daddr, rt_saddr, tos, 0 /* dev */))) {
		IP_MASQ_DEBUG(0-debug, "could not setup maddr for routing daddr=%lX, saddr=%lX\n",
			     ntohl(rt_daddr), ntohl(rt_saddr));
		return ret;
	}
	dev = rt->u.dst.dev;
	ums->maddr = ip_masq_select_addr(dev, rt->rt_gateway, RT_SCOPE_UNIVERSE);

	IP_MASQ_DEBUG(1-debug, "did setup maddr=%lX\n", ntohl(ums->maddr));
	ip_rt_put(rt);
	return 0;
}
Example #8
0
int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
			       struct net *net,
			       const struct ovs_tunnel_info *tun_info,
			       u8 ipproto,
			       u32 skb_mark,
			       __be16 tp_src,
			       __be16 tp_dst)
{
	const struct ovs_key_ipv4_tunnel *tun_key;
	struct rtable *rt;
	struct flowi4 fl;

	if (unlikely(!tun_info))
		return -EINVAL;

	tun_key = &tun_info->tunnel;

	/* Route lookup to get srouce IP address.
	 * The process may need to be changed if the corresponding process
	 * in vports ops changed.
	 */
	memset(&fl, 0, sizeof(fl));
	fl.daddr = tun_key->ipv4_dst;
	fl.saddr = tun_key->ipv4_src;
	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
	fl.flowi4_mark = skb_mark;
	fl.flowi4_proto = ipproto;

	rt = ip_route_output_key(net, &fl);
	if (IS_ERR(rt))
		return PTR_ERR(rt);

	ip_rt_put(rt);

	/* Generate egress_tun_info based on tun_info,
	 * saddr, tp_src and tp_dst
	 */
	__ovs_flow_tun_info_init(egress_tun_info,
				 fl.saddr, tun_key->ipv4_dst,
				 tun_key->ipv4_tos,
				 tun_key->ipv4_ttl,
				 tp_src, tp_dst,
				 tun_key->tun_id,
				 tun_key->tun_flags,
				 tun_info->options,
				 tun_info->options_len);

	return 0;
}
Example #9
0
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
	struct net *net = ovs_dp_get_net(vport->dp);
	struct vxlan_port *vxlan_port = vxlan_vport(vport);
	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->sport;
	struct rtable *rt;
	struct flowi fl;
	__be16 src_port;
	int port_min;
	int port_max;
	__be16 df;
	int err;

	if (unlikely(!OVS_CB(skb)->tun_key)) {
		err = -EINVAL;
		goto error;
	}

	/* Route lookup */
	memset(&fl, 0, sizeof(fl));
	fl.fl4_dst = OVS_CB(skb)->tun_key->ipv4_dst;
	fl.fl4_src = OVS_CB(skb)->tun_key->ipv4_src;
	fl.fl4_tos = RT_TOS(OVS_CB(skb)->tun_key->ipv4_tos);
	fl.mark = skb->mark;
	fl.proto = IPPROTO_UDP;

	err = ip_route_output_key(net, &rt, &fl);
	if (err)
		goto error;

	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
		htons(IP_DF) : 0;

	skb->local_df = 1;

	inet_get_local_port_range(&port_min, &port_max);
	src_port = vxlan_src_port(port_min, port_max, skb);

	err = vxlan_xmit_skb(net, vxlan_port->vs, rt, skb,
			     fl.fl4_src, OVS_CB(skb)->tun_key->ipv4_dst,
			     OVS_CB(skb)->tun_key->ipv4_tos,
			     OVS_CB(skb)->tun_key->ipv4_ttl, df,
			     src_port, dst_port,
			     htonl(be64_to_cpu(OVS_CB(skb)->tun_key->tun_id) << 8));
	if (err < 0)
		ip_rt_put(rt);
error:
	return err;
}
Example #10
0
static struct rtable *gre_get_rt(struct sk_buff *skb,
				 struct net_device *dev,
				 struct flowi4 *fl,
				 const struct ip_tunnel_key *key)
{
	struct net *net = dev_net(dev);

	memset(fl, 0, sizeof(*fl));
	fl->daddr = key->u.ipv4.dst;
	fl->saddr = key->u.ipv4.src;
	fl->flowi4_tos = RT_TOS(key->tos);
	fl->flowi4_mark = skb->mark;
	fl->flowi4_proto = IPPROTO_GRE;

	return ip_route_output_key(net, fl);
}
Example #11
0
static int ip_tunnel_bind_dev(struct net_device *dev)
{
	struct net_device *tdev = NULL;
	struct ip_tunnel *tunnel = netdev_priv(dev);
	const struct iphdr *iph;
	int hlen = LL_MAX_HEADER;
	int mtu = ETH_DATA_LEN;
	int t_hlen = tunnel->hlen + sizeof(struct iphdr);

	iph = &tunnel->parms.iph;

	/* Guess output device to choose reasonable mtu and needed_headroom */
	if (iph->daddr) {
		struct flowi4 fl4;
		struct rtable *rt;

		rt = ip_route_output_tunnel(dev_net(dev), &fl4,
					    tunnel->parms.iph.protocol,
					    iph->daddr, iph->saddr,
					    tunnel->parms.o_key,
					    RT_TOS(iph->tos),
					    tunnel->parms.link);
		if (!IS_ERR(rt)) {
			tdev = rt->dst.dev;
			ip_rt_put(rt);
		}
		if (dev->type != ARPHRD_ETHER)
			dev->flags |= IFF_POINTOPOINT;
	}

	if (!tdev && tunnel->parms.link)
		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);

	if (tdev) {
		hlen = tdev->hard_header_len + tdev->needed_headroom;
		mtu = tdev->mtu;
	}
	dev->iflink = tunnel->parms.link;

	dev->needed_headroom = t_hlen + hlen;
	mtu -= (dev->hard_header_len + t_hlen);

	if (mtu < 68)
		mtu = 68;

	return mtu;
}
Example #12
0
File: vport.c Project: JunoZhu/ovs
static struct rtable *ovs_tunnel_route_lookup(struct net *net,
					      const struct ip_tunnel_key *key,
					      u32 mark,
					      struct flowi4 *fl,
					      u8 protocol)
{
	struct rtable *rt;

	memset(fl, 0, sizeof(*fl));
	fl->daddr = key->u.ipv4.dst;
	fl->saddr = key->u.ipv4.src;
	fl->flowi4_tos = RT_TOS(key->tos);
	fl->flowi4_mark = mark;
	fl->flowi4_proto = protocol;

	rt = ip_route_output_key(net, fl);
	return rt;
}
Example #13
0
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
{
	struct net *net = ovs_dp_get_net(vport->dp);
	struct ovs_key_ipv4_tunnel *tun_key;
	struct flowi4 fl;
	struct rtable *rt;
	int min_headroom;
	int tunnel_hlen;
	__be16 df;
	int err;

	if (unlikely(!OVS_CB(skb)->egress_tun_key)) {
		err = -EINVAL;
		goto error;
	}

	tun_key = OVS_CB(skb)->egress_tun_key;
	/* Route lookup */
	memset(&fl, 0, sizeof(fl));
	fl.daddr = tun_key->ipv4_dst;
	fl.saddr = tun_key->ipv4_src;
	fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos);
	fl.flowi4_mark = skb->mark;
	fl.flowi4_proto = IPPROTO_GRE;

	rt = ip_route_output_key(net, &fl);
	if (IS_ERR(rt))
		return PTR_ERR(rt);

	tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);

	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
			+ tunnel_hlen + sizeof(struct iphdr)
			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
		int head_delta = SKB_DATA_ALIGN(min_headroom -
						skb_headroom(skb) +
						16);
		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
					0, GFP_ATOMIC);
		if (unlikely(err))
			goto err_free_rt;
	}
Example #14
0
static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip_tunnel_info *tun_info;
	struct net *net = dev_net(dev);
	const struct ip_tunnel_key *key;
	struct flowi4 fl;
	struct rtable *rt;
	int min_headroom;
	int tunnel_hlen;
	__be16 df, flags;
	int err;

	tun_info = skb_tunnel_info(skb, AF_INET);
	if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
		goto err_free_skb;

	key = &tun_info->key;
	memset(&fl, 0, sizeof(fl));
	fl.daddr = key->ipv4_dst;
	fl.saddr = key->ipv4_src;
	fl.flowi4_tos = RT_TOS(key->ipv4_tos);
	fl.flowi4_mark = skb->mark;
	fl.flowi4_proto = IPPROTO_GRE;

	rt = ip_route_output_key(net, &fl);
	if (IS_ERR(rt))
		goto err_free_skb;

	tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);

	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
			+ tunnel_hlen + sizeof(struct iphdr);
	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
		int head_delta = SKB_DATA_ALIGN(min_headroom -
						skb_headroom(skb) +
						16);
		err = pskb_expand_head(skb, max_t(int, head_delta, 0),
				       0, GFP_ATOMIC);
		if (unlikely(err))
			goto err_free_rt;
	}
static int route_mirror(struct sk_buff *skb)
{
        struct iphdr *iph = skb->nh.iph;
	struct rtable *rt;

	/* Backwards */
	if (ip_route_output(&rt, iph->saddr, iph->daddr,
			    RT_TOS(iph->tos) | RTO_CONN,
			    0)) {
		return 0;
	}

	/* check if the interface we are leaving by is the same as the
           one we arrived on */
	if (skb->dev == rt->u.dst.dev) {
		/* Drop old route. */
		dst_release(skb->dst);
		skb->dst = &rt->u.dst;
		return 1;
	}
	return 0;
}
Example #16
0
/* With a chainsaw... */
static int route_me_harder(struct sk_buff *skb)
{
    struct iphdr *iph = skb->nh.iph;
    struct rtable *rt;

    struct rt_key key = {
	dst:iph->daddr, src:iph->saddr,
	oif:skb->sk ? skb->sk->bound_dev_if : 0,
	tos:RT_TOS(iph->tos) | RTO_CONN,
#ifdef CONFIG_IP_ROUTE_FWMARK
	fwmark:skb->nfmark
#endif
    };

    if (ip_route_output_key(&rt, &key) != 0)
	return -EINVAL;

    /* Drop old route. */
    dst_release(skb->dst);
    skb->dst = &rt->u.dst;
    return 0;
}
Example #17
0
static struct rtable *route_packet_ipv4(struct sk_buff *skb)
{
	struct iphdr *ip_header = ip_hdr(skb);
	struct flowi fl;
	struct rtable *table;

	memset(&fl, 0, sizeof(fl));
	fl.u.ip4.daddr = ip_header->daddr;
	fl.flowi_tos = RT_TOS(ip_header->tos);
	fl.flowi_proto = skb->protocol;

	table = ip_route_output_key(&init_net, &fl.u.ip4);
	if (!table) {
		log_err(ERR_ROUTE_FAILED, "ip_route_output_key() returned NULL. Cannot route packet.");
		return NULL;
	}
	if (IS_ERR(table)) {
		log_err(ERR_ROUTE_FAILED, "ip_route_output_key() returned %p. Cannot route packet.", table);
		return NULL;
	}

	return table;
}
Example #18
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                  struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct net_device *tdev;		/* Device to other host */
    struct iphdr  *old_iph = ip_hdr(skb);
    u8     tos = old_iph->tos;
    __be16 df = old_iph->frag_off;
    struct iphdr  *iph;			/* Our new IP header */
    unsigned int max_headroom;		/* The extra header space needed */
    int    mtu;
    int ret;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    tdev = rt->dst.dev;

    mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
    if (mtu < 68) {
        IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
        goto tx_error_put;
    }
    if (skb_dst(skb))
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

    df |= (old_iph->frag_off & htons(IP_DF));

    if ((old_iph->frag_off & htons(IP_DF) &&
            mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

    if (skb_headroom(skb) < max_headroom
            || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb =
            skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            ip_rt_put(rt);
            kfree_skb(skb);
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NF_STOLEN;
        }
        kfree_skb(skb);
        skb = new_skb;
        old_iph = ip_hdr(skb);
    }

    skb->transport_header = skb->network_header;

    /* fix old IP header checksum */
    ip_send_check(old_iph);

    skb_push(skb, sizeof(struct iphdr));
    skb_reset_network_header(skb);
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /*
     *	Push down and install the IPIP header.
     */
    iph			=	ip_hdr(skb);
    iph->version		=	4;
    iph->ihl		=	sizeof(struct iphdr)>>2;
    iph->frag_off		=	df;
    iph->protocol		=	IPPROTO_IPIP;
    iph->tos		=	tos;
    iph->daddr		=	rt->rt_dst;
    iph->saddr		=	rt->rt_src;
    iph->ttl		=	old_iph->ttl;
    ip_select_ident(iph, &rt->dst, NULL);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    ret = IP_VS_XMIT_TUNNEL(skb, cp);
    if (ret == NF_ACCEPT)
        ip_local_out(skb);
    else if (ret == NF_DROP)
        kfree_skb(skb);

    LeaveFunction(10);

    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Example #19
0
/*
 *      NAT transmitter (only for outside-to-inside nat forwarding)
 *      Not used for related ICMP
 */
int
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
               struct ip_vs_protocol *pp)
{
    struct rtable *rt;		/* Route to the other host */
    int mtu;
    struct iphdr *iph = ip_hdr(skb);
    int local;

    EnterFunction(10);

    /* check if it is a connection of no-client-port */
    if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
        __be16 _pt, *p;
        p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
        if (p == NULL)
            goto tx_error;
        ip_vs_conn_fill_cport(cp, *p);
        IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
    }

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(iph->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL |
                                  IP_VS_RT_MODE_RDR)))
        goto tx_error_icmp;
    local = rt->rt_flags & RTCF_LOCAL;
    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
                             "ip_vs_nat_xmit(): "
                             "stopping DNAT to local address");
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && ipv4_is_loopback(rt->rt_dst) &&
            rt_is_input_route(skb_rtable(skb))) {
        IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
                         "stopping DNAT to loopback address");
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
                         "ip_vs_nat_xmit(): frag needed for");
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, sizeof(struct iphdr)))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    /* mangle the packet */
    if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
        goto tx_error_put;
    ip_hdr(skb)->daddr = cp->daddr.ip;
    ip_send_check(ip_hdr(skb));

    if (!local) {
        /* drop old route */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        ip_rt_put(rt);
        /*
         * Some IPv4 replies get local address from routes,
         * not from iph, so while we DNAT after routing
         * we need this second input/output route.
         */
        if (!__ip_vs_reroute_locally(skb))
            goto tx_error;
    }

    IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");

    /* FIXME: when application helper enlarges the packet and the length
       is larger than the MTU of outgoing device, there will be still
       MTU problem. */

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

    LeaveFunction(10);
    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Example #20
0
/* Reroute packet to local IPv4 stack after DNAT */
static int
__ip_vs_reroute_locally(struct sk_buff *skb)
{
    struct rtable *rt = skb_rtable(skb);
    struct net_device *dev = rt->dst.dev;
    struct net *net = dev_net(dev);
    struct iphdr *iph = ip_hdr(skb);

    if (rt_is_input_route(rt)) {
        unsigned long orefdst = skb->_skb_refdst;

        if (ip_route_input(skb, iph->daddr, iph->saddr,
                           iph->tos, skb->dev))
            return 0;
        refdst_drop(orefdst);
    } else {
        struct flowi4 fl4 = {
            .daddr = iph->daddr,
            .saddr = iph->saddr,
            .flowi4_tos = RT_TOS(iph->tos),
            .flowi4_mark = skb->mark,
        };

        rt = ip_route_output_key(net, &fl4);
        if (IS_ERR(rt))
            return 0;
        if (!(rt->rt_flags & RTCF_LOCAL)) {
            ip_rt_put(rt);
            return 0;
        }
        /* Drop old route. */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    }
    return 1;
}

#ifdef CONFIG_IP_VS_IPV6

static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
    return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
}

static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
                        struct in6_addr *ret_saddr, int do_xfrm)
{
    struct dst_entry *dst;
    struct flowi6 fl6 = {
        .daddr = *daddr,
    };

    dst = ip6_route_output(net, NULL, &fl6);
    if (dst->error)
        goto out_err;
    if (!ret_saddr)
        return dst;
    if (ipv6_addr_any(&fl6.saddr) &&
            ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
                               &fl6.daddr, 0, &fl6.saddr) < 0)
        goto out_err;
    if (do_xfrm) {
        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
        if (IS_ERR(dst)) {
            dst = NULL;
            goto out_err;
        }
    }
    ipv6_addr_copy(ret_saddr, &fl6.saddr);
    return dst;

out_err:
    dst_release(dst);
    IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
    return NULL;
}

/*
 * Get route to destination or remote server
 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
 *	    &4=Allow redirect from remote daddr to local
 */
static struct rt6_info *
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
                      struct in6_addr *daddr, struct in6_addr *ret_saddr,
                      int do_xfrm, int rt_mode)
{
    struct net *net = dev_net(skb_dst(skb)->dev);
    struct rt6_info *rt;			/* Route to the other host */
    struct rt6_info *ort;			/* Original route */
    struct dst_entry *dst;
    int local;

    if (dest) {
        spin_lock(&dest->dst_lock);
        rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
        if (!rt) {
            u32 cookie;

            dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
                                          &dest->dst_saddr,
                                          do_xfrm);
            if (!dst) {
                spin_unlock(&dest->dst_lock);
                return NULL;
            }
            rt = (struct rt6_info *) dst;
            cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
            __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
            IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
                      &dest->addr.in6, &dest->dst_saddr,
                      atomic_read(&rt->dst.__refcnt));
        }
        if (ret_saddr)
            ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
        spin_unlock(&dest->dst_lock);
    } else {
        dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
        if (!dst)
            return NULL;
        rt = (struct rt6_info *) dst;
    }

    local = __ip_vs_is_local_route6(rt);
    if (!((local ? 1 : 2) & rt_mode)) {
        IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
                     local ? "local":"non-local", daddr);
        dst_release(&rt->dst);
        return NULL;
    }
    if (local && !(rt_mode & 4) &&
            !((ort = (struct rt6_info *) skb_dst(skb)) &&
              __ip_vs_is_local_route6(ort))) {
        IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
                     "requires NAT method, dest: %pI6\n",
                     &ipv6_hdr(skb)->daddr, daddr);
        dst_release(&rt->dst);
        return NULL;
    }
    if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
                 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
                 IPV6_ADDR_LOOPBACK)) {
        IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
                     "to non-local address, dest: %pI6\n",
                     &ipv6_hdr(skb)->saddr, daddr);
        dst_release(&rt->dst);
        return NULL;
    }

    return rt;
}
#endif


/*
 *	Release dest->dst_cache before a dest is removed
 */
void
ip_vs_dst_reset(struct ip_vs_dest *dest)
{
    struct dst_entry *old_dst;

    old_dst = dest->dst_cache;
    dest->dst_cache = NULL;
    dst_release(old_dst);
}

#define IP_VS_XMIT_TUNNEL(skb, cp)				\
({								\
	int __ret = NF_ACCEPT;					\
								\
	(skb)->ipvs_property = 1;				\
	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
		__ret = ip_vs_confirm_conntrack(skb, cp);	\
	if (__ret == NF_ACCEPT) {				\
		nf_reset(skb);					\
		skb_forward_csum(skb);				\
	}							\
	__ret;							\
})

#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	else						\
		ip_vs_update_conntrack(skb, cp, 1);	\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)

#define IP_VS_XMIT(pf, skb, cp, local)			\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)


/*
 *      NULL transmitter (do nothing except return NF_ACCEPT)
 */
int
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp)
{
    /* we do not touch skb and do not need pskb ptr */
    IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
Example #21
0
/*
 *	ICMP packet transmitter
 *	called by the ip_vs_in_icmp
 */
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp, int offset)
{
    struct rtable	*rt;	/* Route to the other host */
    int mtu;
    int rc;
    int local;

    EnterFunction(10);

    /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
       forwarded directly here, because there is no need to
       translate address/port back */
    if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
        if (cp->packet_xmit)
            rc = cp->packet_xmit(skb, cp, pp);
        else
            rc = NF_ACCEPT;
        /* do not touch skb anymore */
        atomic_inc_unchecked(&cp->in_pkts);
        goto out;
    }

    /*
     * mangle and send the packet here (only for VS/NAT)
     */

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(ip_hdr(skb)->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL |
                                  IP_VS_RT_MODE_RDR)))
        goto tx_error_icmp;
    local = rt->rt_flags & RTCF_LOCAL;

    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG(10, "%s(): "
                      "stopping DNAT to local address %pI4\n",
                      __func__, &cp->daddr.ip);
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && ipv4_is_loopback(rt->rt_dst) &&
            rt_is_input_route(skb_rtable(skb))) {
        IP_VS_DBG(1, "%s(): "
                  "stopping DNAT to loopback %pI4\n",
                  __func__, &cp->daddr.ip);
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, offset))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    ip_vs_nat_icmp(skb, pp, cp, 0);

    if (!local) {
        /* drop the old route when skb is not shared */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        ip_rt_put(rt);
        /*
         * Some IPv4 replies get local address from routes,
         * not from iph, so while we DNAT after routing
         * we need this second input/output route.
         */
        if (!__ip_vs_reroute_locally(skb))
            goto tx_error;
    }

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

    rc = NF_STOLEN;
    goto out;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    dev_kfree_skb(skb);
    rc = NF_STOLEN;
out:
    LeaveFunction(10);
    return rc;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Example #22
0
int
ip_do_nat(struct sk_buff *skb)
{
	struct rtable *rt = (struct rtable*)skb->dst;
	struct iphdr *iph = skb->nh.iph;
	u32 odaddr = iph->daddr;
	u32 osaddr = iph->saddr;
	u16	check;

	IPCB(skb)->flags |= IPSKB_TRANSLATED;

	/* Rewrite IP header */
	iph->daddr = rt->rt_dst_map;
	iph->saddr = rt->rt_src_map;
	iph->check = 0;
	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);

	/* If it is the first fragment, rewrite protocol headers */

	if (!(iph->frag_off & htons(IP_OFFSET))) {
		u16	*cksum;

		switch(iph->protocol) {
		case IPPROTO_TCP:
			cksum  = (u16*)&((struct tcphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
			if ((u8*)(cksum+1) > skb->tail)
				goto truncated;
			check = *cksum;
			if (skb->ip_summed != CHECKSUM_HW)
				check = ~check;
			check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, check);
			check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
			if (skb->ip_summed == CHECKSUM_HW)
				check = ~check;
			*cksum = check;
			break;
		case IPPROTO_UDP:
			cksum  = (u16*)&((struct udphdr*)(((char*)iph) + (iph->ihl<<2)))->check;
			if ((u8*)(cksum+1) > skb->tail)
				goto truncated;
			if ((check = *cksum) != 0) {
				check = csum_tcpudp_magic(iph->saddr, iph->daddr, 0, 0, ~check);
				check = csum_tcpudp_magic(~osaddr, ~odaddr, 0, 0, ~check);
				*cksum = check ? : 0xFFFF;
			}
			break;
		case IPPROTO_ICMP:
		{
			struct icmphdr *icmph = (struct icmphdr*)((char*)iph + (iph->ihl<<2));
			struct   iphdr *ciph;
			u32 idaddr, isaddr;
			int updated;

			if ((icmph->type != ICMP_DEST_UNREACH) &&
			    (icmph->type != ICMP_TIME_EXCEEDED) &&
			    (icmph->type != ICMP_PARAMETERPROB))
				break;

			ciph = (struct iphdr *) (icmph + 1);

			if ((u8*)(ciph+1) > skb->tail)
				goto truncated;

			isaddr = ciph->saddr;
			idaddr = ciph->daddr;
			updated = 0;

			if (rt->rt_flags&RTCF_DNAT && ciph->saddr == odaddr) {
				ciph->saddr = iph->daddr;
				updated = 1;
			}
			if (rt->rt_flags&RTCF_SNAT) {
				if (ciph->daddr != osaddr) {
					struct   fib_result res;
					struct   rt_key key;
					unsigned flags = 0;

					key.src = ciph->daddr;
					key.dst = ciph->saddr;
					key.iif = skb->dev->ifindex;
					key.oif = 0;
#ifdef CONFIG_IP_ROUTE_TOS
					key.tos = RT_TOS(ciph->tos);
#endif
#ifdef CONFIG_IP_ROUTE_FWMARK
					key.fwmark = 0;
#endif
					/* Use fib_lookup() until we get our own
					 * hash table of NATed hosts -- Rani
				 	 */
					if (fib_lookup(&key, &res) == 0) {
						if (res.r) {
							ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
							if (ciph->daddr != idaddr)
								updated = 1;
						}
						fib_res_put(&res);
					}
				} else {
					ciph->daddr = iph->saddr;
					updated = 1;
				}
			}
			if (updated) {
				cksum  = &icmph->checksum;
				/* Using tcpudp primitive. Why not? */
				check  = csum_tcpudp_magic(ciph->saddr, ciph->daddr, 0, 0, ~(*cksum));
				*cksum = csum_tcpudp_magic(~isaddr, ~idaddr, 0, 0, ~check);
			}
			break;
		}
		default:
			break;
		}
Example #23
0
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		    const struct iphdr *tnl_params, const u8 protocol)
{
	struct ip_tunnel *tunnel = netdev_priv(dev);
	const struct iphdr *inner_iph;
	struct flowi4 fl4;
	u8     tos, ttl;
	__be16 df;
	struct rtable *rt;		/* Route to the other host */
	unsigned int max_headroom;	/* The extra header space needed */
	__be32 dst;
	int err;

	inner_iph = (const struct iphdr *)skb_inner_network_header(skb);

	dst = tnl_params->daddr;
	if (dst == 0) {
		/* NBMA tunnel */

		if (skb_dst(skb) == NULL) {
			dev->stats.tx_fifo_errors++;
			goto tx_error;
		}

		if (skb->protocol == htons(ETH_P_IP)) {
			rt = skb_rtable(skb);
			dst = rt_nexthop(rt, inner_iph->daddr);
		}
#if IS_ENABLED(CONFIG_IPV6)
		else if (skb->protocol == htons(ETH_P_IPV6)) {
			const struct in6_addr *addr6;
			struct neighbour *neigh;
			bool do_tx_error_icmp;
			int addr_type;

			neigh = dst_neigh_lookup(skb_dst(skb),
						 &ipv6_hdr(skb)->daddr);
			if (neigh == NULL)
				goto tx_error;

			addr6 = (const struct in6_addr *)&neigh->primary_key;
			addr_type = ipv6_addr_type(addr6);

			if (addr_type == IPV6_ADDR_ANY) {
				addr6 = &ipv6_hdr(skb)->daddr;
				addr_type = ipv6_addr_type(addr6);
			}

			if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
				do_tx_error_icmp = true;
			else {
				do_tx_error_icmp = false;
				dst = addr6->s6_addr32[3];
			}
			neigh_release(neigh);
			if (do_tx_error_icmp)
				goto tx_error_icmp;
		}
#endif
		else
			goto tx_error;
	}

	tos = tnl_params->tos;
	if (tos & 0x1) {
		tos &= ~0x1;
		if (skb->protocol == htons(ETH_P_IP))
			tos = inner_iph->tos;
		else if (skb->protocol == htons(ETH_P_IPV6))
			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
	}

	rt = ip_route_output_tunnel(tunnel->net, &fl4,
				    protocol,
				    dst, tnl_params->saddr,
				    tunnel->parms.o_key,
				    RT_TOS(tos),
				    tunnel->parms.link);
	if (IS_ERR(rt)) {
		dev->stats.tx_carrier_errors++;
		goto tx_error;
	}
	if (rt->dst.dev == dev) {
		ip_rt_put(rt);
		dev->stats.collisions++;
		goto tx_error;
	}

	if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
		ip_rt_put(rt);
		goto tx_error;
	}

	if (tunnel->net != dev_net(dev))
		skb_scrub_packet(skb);

	if (tunnel->err_count > 0) {
		if (time_before(jiffies,
				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
			tunnel->err_count--;

			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
	ttl = tnl_params->ttl;
	if (ttl == 0) {
		if (skb->protocol == htons(ETH_P_IP))
			ttl = inner_iph->ttl;
#if IS_ENABLED(CONFIG_IPV6)
		else if (skb->protocol == htons(ETH_P_IPV6))
			ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
#endif
		else
			ttl = ip4_dst_hoplimit(&rt->dst);
	}

	df = tnl_params->frag_off;
	if (skb->protocol == htons(ETH_P_IP))
		df |= (inner_iph->frag_off&htons(IP_DF));

	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
			+ rt->dst.header_len;
	if (max_headroom > dev->needed_headroom)
		dev->needed_headroom = max_headroom;

	if (skb_cow_head(skb, dev->needed_headroom)) {
		dev->stats.tx_dropped++;
		dev_kfree_skb(skb);
		return;
	}

	err = iptunnel_xmit(dev_net(dev), rt, skb,
			    fl4.saddr, fl4.daddr, protocol,
			    tos, ttl, df);
	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);

	return;

#if IS_ENABLED(CONFIG_IPV6)
tx_error_icmp:
	dst_link_failure(skb);
#endif
tx_error:
	dev->stats.tx_errors++;
	dev_kfree_skb(skb);
}
Example #24
0
static int ipip_tunnel_xmit(struct sk_buff *skb, struct device *dev)
{
	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
	struct net_device_stats *stats = &tunnel->stat;
	struct iphdr  *tiph = &tunnel->parms.iph;
	u8     tos = tunnel->parms.iph.tos;
	u16    df = tiph->frag_off;
	struct rtable *rt;     			/* Route to the other host */
	struct device *tdev;			/* Device to other host */
	struct iphdr  *old_iph = skb->nh.iph;
	struct iphdr  *iph;			/* Our new IP header */
	int    max_headroom;			/* The extra header space needed */
	u32    dst = tiph->daddr;
	int    mtu;

	if (tunnel->recursion++) {
		tunnel->stat.collisions++;
		goto tx_error;
	}

	if (skb->protocol != __constant_htons(ETH_P_IP))
		goto tx_error;

	if (tos&1)
		tos = old_iph->tos;

	if (!dst) {
		/* NBMA tunnel */
		if ((rt = (struct rtable*)skb->dst) == NULL) {
			tunnel->stat.tx_fifo_errors++;
			goto tx_error;
		}
		if ((dst = rt->rt_gateway) == 0)
			goto tx_error_icmp;
	}

	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
		tunnel->stat.tx_carrier_errors++;
		goto tx_error_icmp;
	}
	tdev = rt->u.dst.dev;

	if (tdev == dev) {
		ip_rt_put(rt);
		tunnel->stat.collisions++;
		goto tx_error;
	}

	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
	if (mtu < 68) {
		tunnel->stat.collisions++;
		ip_rt_put(rt);
		goto tx_error;
	}
	if (skb->dst && mtu < skb->dst->pmtu)
		skb->dst->pmtu = mtu;

	df |= (old_iph->frag_off&__constant_htons(IP_DF));

	if ((old_iph->frag_off&__constant_htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
		ip_rt_put(rt);
		goto tx_error;
	}

	if (tunnel->err_count > 0) {
		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
			tunnel->err_count--;
			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

	skb->h.raw = skb->nh.raw;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));

	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
  			stats->tx_dropped++;
			dev_kfree_skb(skb);
			tunnel->recursion--;
			return 0;
		}
		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb(skb);
		skb = new_skb;
	}

	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	dst_release(skb->dst);
	skb->dst = &rt->u.dst;

	/*
	 *	Push down and install the IPIP header.
	 */

	iph 			=	skb->nh.iph;
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	tos;
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;

	if ((iph->ttl = tiph->ttl) == 0)
		iph->ttl	=	old_iph->ttl;

	iph->tot_len		=	htons(skb->len);
	iph->id			=	htons(ip_id_count++);
	ip_send_check(iph);

	stats->tx_bytes += skb->len;
	stats->tx_packets++;
	ip_send(skb);
	tunnel->recursion--;
	return 0;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	stats->tx_errors++;
	dev_kfree_skb(skb);
	tunnel->recursion--;
	return 0;
}
Example #25
0
/* This requires some explaining. If DNAT has taken place,
 * we will need to fix up the destination Ethernet address.
 *
 * There are two cases to consider:
 * 1. The packet was DNAT'ed to a device in the same bridge
 *    port group as it was received on. We can still bridge
 *    the packet.
 * 2. The packet was DNAT'ed to a different device, either
 *    a non-bridged device or another bridge port group.
 *    The packet will need to be routed.
 *
 * The correct way of distinguishing between these two cases is to
 * call ip_route_input() and to look at skb->dst->dev, which is
 * changed to the destination device if ip_route_input() succeeds.
 *
 * Let's first consider the case that ip_route_input() succeeds:
 *
 * If the output device equals the logical bridge device the packet
 * came in on, we can consider this bridging. The corresponding MAC
 * address will be obtained in br_nf_pre_routing_finish_bridge.
 * Otherwise, the packet is considered to be routed and we just
 * change the destination MAC address so that the packet will
 * later be passed up to the IP stack to be routed. For a redirected
 * packet, ip_route_input() will give back the localhost as output device,
 * which differs from the bridge device.
 *
 * Let's now consider the case that ip_route_input() fails:
 *
 * This can be because the destination address is martian, in which case
 * the packet will be dropped.
 * If IP forwarding is disabled, ip_route_input() will fail, while
 * ip_route_output_key() can return success. The source
 * address for ip_route_output_key() is set to zero, so ip_route_output_key()
 * thinks we're handling a locally generated packet and won't care
 * if IP forwarding is enabled. If the output device equals the logical bridge
 * device, we proceed as if ip_route_input() succeeded. If it differs from the
 * logical bridge port or if ip_route_output_key() fails we drop the packet.
 */
static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
	struct iphdr *iph = ip_hdr(skb);
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
	struct rtable *rt;
	int err;

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
	if (dnat_took_place(skb)) {
		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
			struct in_device *in_dev = __in_dev_get_rcu(dev);

			/* If err equals -EHOSTUNREACH the error is due to a
			 * martian destination or due to the fact that
			 * forwarding is disabled. For most martian packets,
			 * ip_route_output_key() will fail. It won't fail for 2 types of
			 * martian destinations: loopback destinations and destination
			 * 0.0.0.0. In both cases the packet will be dropped because the
			 * destination is the loopback device and not the bridge. */
			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
				goto free_skb;

			rt = ip_route_output(dev_net(dev), iph->daddr, 0,
					     RT_TOS(iph->tos), 0);
			if (!IS_ERR(rt)) {
				/* - Bridged-and-DNAT'ed traffic doesn't
				 *   require ip_forwarding. */
				if (rt->dst.dev == dev) {
					skb_dst_set(skb, &rt->dst);
					goto bridged_dnat;
				}
				ip_rt_put(rt);
			}
free_skb:
			kfree_skb(skb);
			return 0;
		} else {
			if (skb_dst(skb)->dev == dev) {
bridged_dnat:
				skb->dev = nf_bridge->physindev;
				nf_bridge_update_protocol(skb);
				nf_bridge_push_encap_header(skb);
				NF_HOOK_THRESH(NFPROTO_BRIDGE,
					       NF_BR_PRE_ROUTING,
					       skb, skb->dev, NULL,
					       br_nf_pre_routing_finish_bridge,
					       1);
				return 0;
			}
			memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
			skb->pkt_type = PACKET_HOST;
		}
	} else {
		rt = bridge_parent_rtable(nf_bridge->physindev);
		if (!rt) {
			kfree_skb(skb);
			return 0;
		}
		skb_dst_set_noref(skb, &rt->dst);
	}

	skb->dev = nf_bridge->physindev;
	nf_bridge_update_protocol(skb);
	nf_bridge_push_encap_header(skb);
	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}
Example #26
0
void ipip_err(struct sk_buff *skb, unsigned char *dp, int len)
{
#ifndef I_WISH_WORLD_WERE_PERFECT

/* It is not :-( All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
 */
	struct iphdr *iph = (struct iphdr*)dp;
	int type = skb->h.icmph->type;
	int code = skb->h.icmph->code;
	struct ip_tunnel *t;

	if (len < sizeof(struct iphdr))
		return;

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
		return;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
			return;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
	if (t == NULL || t->parms.iph.daddr == 0)
		return;
	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		return;

	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
	return;
#else
	struct iphdr *iph = (struct iphdr*)dp;
	int hlen = iph->ihl<<2;
	struct iphdr *eiph;
	int type = skb->h.icmph->type;
	int code = skb->h.icmph->code;
	int rel_type = 0;
	int rel_code = 0;
	int rel_info = 0;
	struct sk_buff *skb2;
	struct rtable *rt;

	if (len < hlen + sizeof(struct iphdr))
		return;
	eiph = (struct iphdr*)(dp + hlen);

	switch (type) {
	default:
		return;
	case ICMP_PARAMETERPROB:
		if (skb->h.icmph->un.gateway < hlen)
			return;

		/* So... This guy found something strange INSIDE encapsulated
		   packet. Well, he is fool, but what can we do ?
		 */
		rel_type = ICMP_PARAMETERPROB;
		rel_info = skb->h.icmph->un.gateway - hlen;
		break;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* And it is the only really necessary thing :-) */
			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
			if (rel_info < hlen+68)
				return;
			rel_info -= hlen;
			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
			if (rel_info > ntohs(eiph->tot_len))
				return;
			break;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe, it is just ether pollution. --ANK
			 */
			rel_type = ICMP_DEST_UNREACH;
			rel_code = ICMP_HOST_UNREACH;
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

	/* Prepare fake skb to feed it to icmp_send */
	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (skb2 == NULL)
		return;
	dst_release(skb2->dst);
	skb2->dst = NULL;
	skb_pull(skb2, skb->data - (u8*)eiph);
	skb2->nh.raw = skb2->data;

	/* Try to guess incoming interface */
	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
		kfree_skb(skb2);
		return;
	}
	skb2->dev = rt->u.dst.dev;

	/* route "incoming" packet */
	if (rt->rt_flags&RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
			ip_rt_put(rt);
			kfree_skb(skb2);
			return;
		}
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
		    skb2->dst->dev->type != ARPHRD_IPGRE) {
			kfree_skb(skb2);
			return;
		}
	}

	/* change mtu on this route */
	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
		if (rel_info > skb2->dst->pmtu) {
			kfree_skb(skb2);
			return;
		}
		skb2->dst->pmtu = rel_info;
		rel_info = htonl(rel_info);
	} else if (type == ICMP_TIME_EXCEEDED) {
		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
		if (t->parms.iph.ttl) {
			rel_type = ICMP_DEST_UNREACH;
			rel_code = ICMP_HOST_UNREACH;
		}
	}

	icmp_send(skb2, rel_type, rel_code, rel_info);
	kfree_skb(skb2);
	return;
#endif
}
Example #27
0
/*
 * Create syn packet and send it to rs.
 * ATTENTION: we also store syn skb in cp if syn retransimition
 * is tured on.
 */
static int
syn_proxy_send_rs_syn(int af, const struct tcphdr *th,
		      struct ip_vs_conn *cp, struct sk_buff *skb,
		      struct ip_vs_protocol *pp, struct ip_vs_synproxy_opt *opt)
{
	struct sk_buff *syn_skb;
	int tcp_hdr_size;
	__u8 tcp_flags = TCPCB_FLAG_SYN;
	unsigned int tcphoff;
	struct tcphdr *new_th;

	if (!cp->packet_xmit) {
		IP_VS_ERR_RL("warning: packet_xmit is null");
		return 0;
	}

	syn_skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
	if (unlikely(syn_skb == NULL)) {
		IP_VS_ERR_RL("alloc skb failed when send rs syn packet\n");
		return 0;
	}

	/* Reserve space for headers */
	skb_reserve(syn_skb, MAX_TCP_HEADER);
	tcp_hdr_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
			(opt->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
			(opt->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
			/* SACK_PERM is in the place of NOP NOP of TS */
			((opt->sack_ok
			  && !opt->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));

	new_th = (struct tcphdr *)skb_push(syn_skb, tcp_hdr_size);
	/* Compose tcp header */
	skb_reset_transport_header(syn_skb);
	syn_skb->csum = 0;

	/* Set tcp hdr */
	new_th->source = th->source;
	new_th->dest = th->dest;
	new_th->seq = htonl(ntohl(th->seq) - 1);
	new_th->ack_seq = 0;
	*(((__u16 *) new_th) + 6) =
	    htons(((tcp_hdr_size >> 2) << 12) | tcp_flags);
	/* FIX_ME: what window should we use */
	new_th->window = htons(5000);
	new_th->check = 0;
	new_th->urg_ptr = 0;
	new_th->urg = 0;
	new_th->ece = 0;
	new_th->cwr = 0;

	syn_proxy_syn_build_options((__be32 *) (new_th + 1), opt);

	/*
	 * Set ip hdr
	 * Attention: set source and dest addr to ack skb's.
	 * we rely on packet_xmit func to do NATs thing.
	 */
#ifdef CONFIG_IP_VS_IPV6
	if (af == AF_INET6) {
		struct ipv6hdr *ack_iph = ipv6_hdr(skb);
		struct ipv6hdr *iph =
		    (struct ipv6hdr *)skb_push(syn_skb, sizeof(struct ipv6hdr));

		tcphoff = sizeof(struct ipv6hdr);
		skb_reset_network_header(syn_skb);
		memcpy(&iph->saddr, &ack_iph->saddr, sizeof(struct in6_addr));
		memcpy(&iph->daddr, &ack_iph->daddr, sizeof(struct in6_addr));

		iph->version = 6;
		iph->nexthdr = NEXTHDR_TCP;
		iph->payload_len = htons(tcp_hdr_size);
		iph->hop_limit = IPV6_DEFAULT_HOPLIMIT;

		new_th->check = 0;
		syn_skb->csum =
		    skb_checksum(syn_skb, tcphoff, syn_skb->len - tcphoff, 0);
		new_th->check =
		    csum_ipv6_magic(&iph->saddr, &iph->daddr,
				    syn_skb->len - tcphoff, IPPROTO_TCP,
				    syn_skb->csum);
	} else
#endif
	{
		struct iphdr *ack_iph = ip_hdr(skb);
		u32 rtos = RT_TOS(ack_iph->tos);
		struct iphdr *iph =
		    (struct iphdr *)skb_push(syn_skb, sizeof(struct iphdr));

		tcphoff = sizeof(struct iphdr);
		skb_reset_network_header(syn_skb);
		*((__u16 *) iph) = htons((4 << 12) | (5 << 8) | (rtos & 0xff));
		iph->tot_len = htons(syn_skb->len);
		iph->frag_off = htons(IP_DF);
		/* FIX_ME: what ttl shoule we use */
		iph->ttl = IPDEFTTL;
		iph->protocol = IPPROTO_TCP;
		iph->saddr = ack_iph->saddr;
		iph->daddr = ack_iph->daddr;

		ip_send_check(iph);

		new_th->check = 0;
		syn_skb->csum =
		    skb_checksum(syn_skb, tcphoff, syn_skb->len - tcphoff, 0);
		new_th->check =
		    csum_tcpudp_magic(iph->saddr, iph->daddr,
				      syn_skb->len - tcphoff, IPPROTO_TCP,
				      syn_skb->csum);
	}

	/* Save syn_skb if syn retransmission is on  */
	if (sysctl_ip_vs_synproxy_syn_retry > 0) {
		cp->syn_skb = skb_copy(syn_skb, GFP_ATOMIC);
		atomic_set(&cp->syn_retry_max, sysctl_ip_vs_synproxy_syn_retry);
	}

	/* Save info for fast_response_xmit */
	if(sysctl_ip_vs_fast_xmit && skb->dev &&
				likely(skb->dev->type == ARPHRD_ETHER) &&
				skb_mac_header_was_set(skb)) {
		struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);

		if(likely(cp->indev == NULL)) {
			cp->indev = skb->dev;
			dev_hold(cp->indev);
		}

		if (unlikely(cp->indev != skb->dev)) {
			dev_put(cp->indev);
			cp->indev = skb->dev;
			dev_hold(cp->indev);
		}

		memcpy(cp->src_hwaddr, eth->h_source, ETH_ALEN);
		memcpy(cp->dst_hwaddr, eth->h_dest, ETH_ALEN);
		IP_VS_INC_ESTATS(ip_vs_esmib, FAST_XMIT_SYNPROXY_SAVE);
		IP_VS_DBG_RL("syn_proxy_send_rs_syn netdevice:%s\n",
						netdev_name(skb->dev));
	}

	/* count in the syn packet */
	ip_vs_in_stats(cp, skb);

	/* If xmit failed, syn_skb will be freed correctly. */
	cp->packet_xmit(syn_skb, cp, pp);

	return 1;
}
Example #28
0
static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
	struct sock *sk = (struct sock *) chan->private;
	struct pppox_sock *po = pppox_sk(sk);
	struct pptp_opt *opt=&po->proto.pptp;
	struct pptp_gre_header *hdr;
	unsigned int header_len=sizeof(*hdr);
	int len=skb?skb->len:0;
	int err=0;
	int window;

	struct rtable *rt;     			/* Route to the other host */
	struct net_device *tdev;			/* Device to other host */
	struct iphdr  *iph;			/* Our new IP header */
	int    max_headroom;			/* The extra header space needed */

	INC_TX_PACKETS;

	spin_lock_bh(&opt->xmit_lock);
	
	window=WRAPPED(opt->ack_recv,opt->seq_sent)?(__u32)0xffffffff-opt->seq_sent+opt->ack_recv:opt->seq_sent-opt->ack_recv;

	if (!skb){
	    if (opt->ack_sent == opt->seq_recv) goto exit;
	}else if (window>opt->window){
		__set_bit(PPTP_FLAG_PAUSE,(unsigned long*)&opt->flags);
		#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
		mod_timer(&opt->ack_timeout_timer,opt->stat->rtt/100*HZ/10000);
		#else
		schedule_delayed_work(&opt->ack_timeout_work,opt->stat->rtt/100*HZ/10000);
		#endif
		goto exit;
	}

	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	{
		struct rt_key key = {
			.dst=opt->dst_addr.sin_addr.s_addr,
			.src=opt->src_addr.sin_addr.s_addr,
			.tos=RT_TOS(0),
		};
		if ((err=ip_route_output_key(&rt, &key))) {
			goto tx_error;
		}
	}
	#else
	{
		struct flowi fl = { .oif = 0,
				    .nl_u = { .ip4_u =
					      { .daddr = opt->dst_addr.sin_addr.s_addr,
						.saddr = opt->src_addr.sin_addr.s_addr,
						.tos = RT_TOS(0) } },
				    .proto = IPPROTO_GRE };
		if ((err=ip_route_output_key(&rt, &fl))) {
			goto tx_error;
		}
	}
	#endif
	tdev = rt->u.dst.dev;
	
	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	max_headroom = ((tdev->hard_header_len+15)&~15) + sizeof(*iph)+sizeof(*hdr)+2;
	#else
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph)+sizeof(*hdr)+2;
	#endif
	

	if (!skb){
		skb=dev_alloc_skb(max_headroom);
		if (!skb) {
			ip_rt_put(rt);
			goto tx_error;
		}
		skb_reserve(skb,max_headroom-skb_headroom(skb));
	}else if (skb_headroom(skb) < max_headroom ||
						skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
			goto tx_error;
		}
		if (skb->sk)
		skb_set_owner_w(new_skb, skb->sk);
		kfree_skb(skb);
		skb = new_skb;
	}
	
	if (skb->len){
		int islcp;
		unsigned char *data=skb->data;
		islcp=((data[0] << 8) + data[1])== PPP_LCP && 1 <= data[2] && data[2] <= 7;		
		
		/* compress protocol field */
		if ((opt->ppp_flags & SC_COMP_PROT) && data[0]==0 && !islcp)
			skb_pull(skb,1);
		
		/*
		 * Put in the address/control bytes if necessary
		 */
		if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) {
			data=skb_push(skb,2);
			data[0]=0xff;
			data[1]=0x03;
		}
	}
	len=skb->len;

	if (len==0) header_len-=sizeof(hdr->seq);
	if (opt->ack_sent == opt->seq_recv) header_len-=sizeof(hdr->ack);

	// Push down and install GRE header
	skb_push(skb,header_len);
	hdr=(struct pptp_gre_header *)(skb->data);

	hdr->flags       = PPTP_GRE_FLAG_K;
	hdr->ver         = PPTP_GRE_VER;
	hdr->protocol    = htons(PPTP_GRE_PROTO);
	hdr->call_id     = htons(opt->dst_addr.call_id);

	if (!len){
		hdr->payload_len = 0;
		hdr->ver |= PPTP_GRE_FLAG_A;
		/* ack is in odd place because S == 0 */
		hdr->seq = htonl(opt->seq_recv);
		opt->ack_sent = opt->seq_recv;
		opt->stat->tx_acks++;
	}else {
		hdr->flags |= PPTP_GRE_FLAG_S;
		hdr->seq    = htonl(opt->seq_sent++);
		if (log_level>=3 && opt->seq_sent<=log_packets)
			printk(KERN_INFO"PPTP[%i]: send packet: seq=%i",opt->src_addr.call_id,opt->seq_sent);
		if (opt->ack_sent != opt->seq_recv)	{
		/* send ack with this message */
			hdr->ver |= PPTP_GRE_FLAG_A;
			hdr->ack  = htonl(opt->seq_recv);
			opt->ack_sent = opt->seq_recv;
			if (log_level>=3 && opt->seq_sent<=log_packets)
				printk(" ack=%i",opt->seq_recv);
		}
		hdr->payload_len = htons(len);
		if (log_level>=3 && opt->seq_sent<=log_packets)
			printk("\n");
	}

	/*
	 *	Push down and install the IP header.
	 */

	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
	skb->transport_header = skb->network_header;
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
	#else
	skb->h.raw = skb->nh.raw;
	skb->nh.raw = skb_push(skb, sizeof(*iph));
	#endif
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
			      IPSKB_REROUTED);
	#endif

	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
	iph 			=	ip_hdr(skb);
	#else
	iph 			=	skb->nh.iph;
	#endif
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr) >> 2;
	iph->frag_off		=	0;//df;
	iph->protocol		=	IPPROTO_GRE;
	iph->tos		=	0;
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;
	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	iph->ttl = sysctl_ip_default_ttl;
	#else
	iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
	#endif
	iph->tot_len = htons(skb->len);

	dst_release(skb->dst);
	skb->dst = &rt->u.dst;
	
	nf_reset(skb);

	skb->ip_summed = CHECKSUM_NONE;
	ip_select_ident(iph, &rt->u.dst, NULL);
	ip_send_check(iph);

	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output);
	
	wake_up(&opt->wait);

	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
		opt->stat->tx_sent++;
		if (!opt->stat->pt_seq){
			opt->stat->pt_seq  = opt->seq_sent;
			do_gettimeofday(&opt->stat->pt_time);
		}
	}else{
		INC_TX_ERRORS;
		opt->stat->tx_failed++;	
	}

	spin_unlock_bh(&opt->xmit_lock);
	return 1;

tx_error:
	INC_TX_ERRORS;
	opt->stat->tx_failed++;
	if (!len) kfree_skb(skb);
	spin_unlock_bh(&opt->xmit_lock);
	return 1;
exit:
	spin_unlock_bh(&opt->xmit_lock);
	return 0;
}
Example #29
0
/* This requires some explaining. If DNAT has taken place,
 * we will need to fix up the destination Ethernet address.
 * This is also true when SNAT takes place (for the reply direction).
 *
 * There are two cases to consider:
 * 1. The packet was DNAT'ed to a device in the same bridge
 *    port group as it was received on. We can still bridge
 *    the packet.
 * 2. The packet was DNAT'ed to a different device, either
 *    a non-bridged device or another bridge port group.
 *    The packet will need to be routed.
 *
 * The correct way of distinguishing between these two cases is to
 * call ip_route_input() and to look at skb->dst->dev, which is
 * changed to the destination device if ip_route_input() succeeds.
 *
 * Let's first consider the case that ip_route_input() succeeds:
 *
 * If the output device equals the logical bridge device the packet
 * came in on, we can consider this bridging. The corresponding MAC
 * address will be obtained in br_nf_pre_routing_finish_bridge.
 * Otherwise, the packet is considered to be routed and we just
 * change the destination MAC address so that the packet will
 * later be passed up to the IP stack to be routed. For a redirected
 * packet, ip_route_input() will give back the localhost as output device,
 * which differs from the bridge device.
 *
 * Let's now consider the case that ip_route_input() fails:
 *
 * This can be because the destination address is martian, in which case
 * the packet will be dropped.
 * If IP forwarding is disabled, ip_route_input() will fail, while
 * ip_route_output_key() can return success. The source
 * address for ip_route_output_key() is set to zero, so ip_route_output_key()
 * thinks we're handling a locally generated packet and won't care
 * if IP forwarding is enabled. If the output device equals the logical bridge
 * device, we proceed as if ip_route_input() succeeded. If it differs from the
 * logical bridge port or if ip_route_output_key() fails we drop the packet.
 */
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
	struct iphdr *iph = ip_hdr(skb);
	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
	struct rtable *rt;
	int err;

	nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;

	if (nf_bridge->pkt_otherhost) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->pkt_otherhost = false;
	}
	nf_bridge->in_prerouting = 0;
	if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
			struct in_device *in_dev = __in_dev_get_rcu(dev);

			/* If err equals -EHOSTUNREACH the error is due to a
			 * martian destination or due to the fact that
			 * forwarding is disabled. For most martian packets,
			 * ip_route_output_key() will fail. It won't fail for 2 types of
			 * martian destinations: loopback destinations and destination
			 * 0.0.0.0. In both cases the packet will be dropped because the
			 * destination is the loopback device and not the bridge. */
			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
				goto free_skb;

			rt = ip_route_output(net, iph->daddr, 0,
					     RT_TOS(iph->tos), 0);
			if (!IS_ERR(rt)) {
				/* - Bridged-and-DNAT'ed traffic doesn't
				 *   require ip_forwarding. */
				if (rt->dst.dev == dev) {
					skb_dst_set(skb, &rt->dst);
					goto bridged_dnat;
				}
				ip_rt_put(rt);
			}
free_skb:
			kfree_skb(skb);
			return 0;
		} else {
			if (skb_dst(skb)->dev == dev) {
bridged_dnat:
				skb->dev = nf_bridge->physindev;
				nf_bridge_update_protocol(skb);
				nf_bridge_push_encap_header(skb);
				br_nf_hook_thresh(NF_BR_PRE_ROUTING,
						  net, sk, skb, skb->dev,
						  NULL,
						  br_nf_pre_routing_finish_bridge);
				return 0;
			}
			ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
			skb->pkt_type = PACKET_HOST;
		}
	} else {
		rt = bridge_parent_rtable(nf_bridge->physindev);
		if (!rt) {
			kfree_skb(skb);
			return 0;
		}
		skb_dst_set_noref(skb, &rt->dst);
	}

	skb->dev = nf_bridge->physindev;
	nf_bridge_update_protocol(skb);
	nf_bridge_push_encap_header(skb);
	br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
			  br_handle_frame_finish);
	return 0;
}
Example #30
0
static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
	struct net_device_stats *stats = &tunnel->stat;
	struct iphdr  *tiph = &tunnel->parms.iph;
	struct ipv6hdr *iph6 = skb->nh.ipv6h;
	u8     tos = tunnel->parms.iph.tos;
	struct rtable *rt;     			/* Route to the other host */
	struct net_device *tdev;			/* Device to other host */
	struct iphdr  *iph;			/* Our new IP header */
	int    max_headroom;			/* The extra header space needed */
	u32    dst = tiph->daddr;
	int    mtu;
	struct in6_addr *addr6;	
	int addr_type;

	if (tunnel->recursion++) {
		tunnel->stat.collisions++;
		goto tx_error;
	}

	if (skb->protocol != htons(ETH_P_IPV6))
		goto tx_error;

	if (!dst)
		dst = try_6to4(&iph6->daddr);

	if (!dst) {
		struct neighbour *neigh = NULL;

		if (skb->dst)
			neigh = skb->dst->neighbour;

		if (neigh == NULL) {
			if (net_ratelimit())
				printk(KERN_DEBUG "sit: nexthop == NULL\n");
			goto tx_error;
		}

		addr6 = (struct in6_addr*)&neigh->primary_key;
		addr_type = ipv6_addr_type(addr6);

		if (addr_type == IPV6_ADDR_ANY) {
			addr6 = &skb->nh.ipv6h->daddr;
			addr_type = ipv6_addr_type(addr6);
		}

		if (addr_type & IPV6_ADDR_COMPATv4)
			dst = addr6->s6_addr32[3];
		else
#ifdef CONFIG_IPV6_6TO4_NEXTHOP
		if (!(dst = try_6to4(addr6)))
#endif
			goto tx_error_icmp;
	}

	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
		tunnel->stat.tx_carrier_errors++;
		goto tx_error_icmp;
	}
	if (rt->rt_type != RTN_UNICAST) {
		tunnel->stat.tx_carrier_errors++;
		goto tx_error_icmp;
	}
	tdev = rt->u.dst.dev;

	if (tdev == dev) {
		ip_rt_put(rt);
		tunnel->stat.collisions++;
		goto tx_error;
	}

	if (tiph->frag_off)
		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
	else
		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;

	if (mtu < 68) {
		tunnel->stat.collisions++;
		ip_rt_put(rt);
		goto tx_error;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
	if (skb->dst && mtu < skb->dst->pmtu) {
		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
		if (mtu < rt6->u.dst.pmtu) {
			if (tunnel->parms.iph.daddr || rt6->rt6i_dst.plen == 128) {
				rt6->rt6i_flags |= RTF_MODIFIED;
				rt6->u.dst.pmtu = mtu;
			}
		}
	}
	if (skb->len > mtu) {
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
		ip_rt_put(rt);
		goto tx_error;
	}

	if (tunnel->err_count > 0) {
		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
			tunnel->err_count--;
			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));

	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
  			stats->tx_dropped++;
			dev_kfree_skb(skb);
			tunnel->recursion--;
			return 0;
		}
		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb(skb);
		skb = new_skb;
		iph6 = skb->nh.ipv6h;
	}

	skb->h.raw = skb->nh.raw;
	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	dst_release(skb->dst);
	skb->dst = &rt->u.dst;

	/*
	 *	Push down and install the IPIP header.
	 */

	iph 			=	skb->nh.iph;
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	if (mtu > IPV6_MIN_MTU)
		iph->frag_off	=	htons(IP_DF);
	else
		iph->frag_off	=	0;

	iph->protocol		=	IPPROTO_IPV6;
	iph->tos		=	INET_ECN_encapsulate(tos, ip6_get_dsfield(iph6));
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;

	if ((iph->ttl = tiph->ttl) == 0)
		iph->ttl	=	iph6->hop_limit;

	nf_reset(skb);

	IPTUNNEL_XMIT();
	tunnel->recursion--;
	return 0;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	stats->tx_errors++;
	dev_kfree_skb(skb);
	tunnel->recursion--;
	return 0;
}