Exemplo n.º 1
0
static unsigned int
echo_tg4(struct sk_buff **poldskb, const struct xt_action_param *par)
{
	const struct sk_buff *oldskb = *poldskb;
	const struct udphdr *oldudp;
	const struct iphdr *oldip;
	struct udphdr *newudp, oldudp_buf;
	struct iphdr *newip;
	struct sk_buff *newskb;
	unsigned int data_len;
	void *payload;

	/* This allows us to do the copy operation in fewer lines of code. */
	if (skb_linearize(*poldskb) < 0)
		return NF_DROP;

	oldip  = ip_hdr(oldskb);
	oldudp = skb_header_pointer(oldskb, par->thoff,
	         sizeof(*oldudp), &oldudp_buf);
	if (oldudp == NULL)
		return NF_DROP;
	if (ntohs(oldudp->len) <= sizeof(*oldudp))
		return NF_DROP;

	newskb = alloc_skb(LL_MAX_HEADER + sizeof(*newip) +
	         ntohs(oldudp->len), GFP_ATOMIC);
	if (newskb == NULL)
		return NF_DROP;

	skb_reserve(newskb, LL_MAX_HEADER);
	newskb->protocol = oldskb->protocol;

	skb_reset_network_header(newskb);
	newip = (void *)skb_put(newskb, sizeof(*newip));
	newip->version  = oldip->version;
	newip->ihl      = sizeof(*newip) / 4;
	newip->tos      = oldip->tos;
	newip->id       = 0;
	newip->frag_off = htons(IP_DF);
	newip->protocol = oldip->protocol;
	newip->check    = 0;
	newip->saddr    = oldip->daddr;
	newip->daddr    = oldip->saddr;

	skb_reset_transport_header(newskb);
	newudp = (void *)skb_put(newskb, sizeof(*newudp));
	newudp->source = oldudp->dest;
	newudp->dest   = oldudp->source;
	newudp->len    = oldudp->len;

	data_len = htons(oldudp->len) - sizeof(*oldudp);
	payload  = skb_header_pointer(oldskb, par->thoff +
	           sizeof(*oldudp), data_len, NULL);
	memcpy(skb_put(newskb, data_len), payload, data_len);

#if 0
	/*
	 * Since no fields are modified (we just swapped things around),
	 * this works too in our specific echo case.
	 */
	newudp->check = oldudp->check;
#else
	newudp->check = 0;
	newudp->check = csum_tcpudp_magic(newip->saddr, newip->daddr,
	                ntohs(newudp->len), IPPROTO_UDP,
	                csum_partial(newudp, ntohs(newudp->len), 0));
#endif

	/* ip_route_me_harder expects the skb's dst to be set */
	skb_dst_set(newskb, dst_clone(skb_dst(oldskb)));

	if (ip_route_me_harder(&newskb, RTN_UNSPEC) != 0)
		goto free_nskb;

#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
	newip->ttl = ip4_dst_hoplimit(skb_dst(newskb));
#else
	newip->ttl = dst_metric(skb_dst(newskb), RTAX_HOPLIMIT);
#endif
	newskb->ip_summed = CHECKSUM_NONE;

	/* "Never happens" (?) */
	if (newskb->len > dst_mtu(skb_dst(newskb)))
		goto free_nskb;

	nf_ct_attach(newskb, *poldskb);
	ip_local_out(newskb);
	return NF_DROP;

 free_nskb:
	kfree_skb(newskb);
	return NF_DROP;
}
int ip6_push_pending_frames(struct sock *sk)
{
	struct sk_buff *skb, *tmp_skb;
	struct sk_buff **tail_skb;
	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net *net = sock_net(sk);
	struct ipv6hdr *hdr;
	struct ipv6_txoptions *opt = np->cork.opt;
	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
	unsigned char proto = fl6->flowi6_proto;
	int err = 0;

	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
		goto out;
	tail_skb = &(skb_shinfo(skb)->frag_list);

	/* move skb->data to ip header from ext header */
	if (skb->data < skb_network_header(skb))
		__skb_pull(skb, skb_network_offset(skb));
	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
		__skb_pull(tmp_skb, skb_network_header_len(skb));
		*tail_skb = tmp_skb;
		tail_skb = &(tmp_skb->next);
		skb->len += tmp_skb->len;
		skb->data_len += tmp_skb->len;
		skb->truesize += tmp_skb->truesize;
		tmp_skb->destructor = NULL;
		tmp_skb->sk = NULL;
	}

	/* Allow local fragmentation. */
	if (np->pmtudisc < IPV6_PMTUDISC_DO)
		skb->local_df = 1;

	*final_dst = fl6->daddr;
	__skb_pull(skb, skb_network_header_len(skb));
	if (opt && opt->opt_flen)
		ipv6_push_frag_opts(skb, opt, &proto);
	if (opt && opt->opt_nflen)
		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	hdr = ipv6_hdr(skb);

	ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
	hdr->hop_limit = np->cork.hop_limit;
	hdr->nexthdr = proto;
	hdr->saddr = fl6->saddr;
	hdr->daddr = *final_dst;

	skb->priority = sk->sk_priority;
	skb->mark = sk->sk_mark;

	skb_dst_set(skb, dst_clone(&rt->dst));
	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
	if (proto == IPPROTO_ICMPV6) {
		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));

		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
	}

	err = ip6_local_out(skb);
	if (err) {
		if (err > 0)
			err = net_xmit_errno(err);
		if (err)
			goto error;
	}

out:
	ip6_cork_release(inet, np);
	return err;
error:
	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
	goto out;
}
Exemplo n.º 3
0
static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
        netdev_features_t features,
        bool tx_path,
        sa_family_t sa_family)
{
    void *iph = skb_network_header(skb);
    int pkt_hlen = skb_inner_network_offset(skb); /* inner l2 + tunnel hdr. */
    int mac_offset = skb_inner_mac_offset(skb);
    int outer_l3_offset = skb_network_offset(skb);
    int outer_l4_offset = skb_transport_offset(skb);
    struct sk_buff *skb1 = skb;
    struct dst_entry *dst = skb_dst(skb);
    struct sk_buff *segs;
    __be16 proto = skb->protocol;
    char cb[sizeof(skb->cb)];

    OVS_GSO_CB(skb)->ipv6 = (sa_family == AF_INET6);
    /* setup whole inner packet to get protocol. */
    __skb_pull(skb, mac_offset);
    skb->protocol = __skb_network_protocol(skb);

    /* setup l3 packet to gso, to get around segmentation bug on older kernel.*/
    __skb_pull(skb, (pkt_hlen - mac_offset));
    skb_reset_mac_header(skb);
    skb_reset_network_header(skb);
    skb_reset_transport_header(skb);

    /* From 3.9 kernel skb->cb is used by skb gso. Therefore
     * make copy of it to restore it back. */
    memcpy(cb, skb->cb, sizeof(cb));

    skb->encapsulation = 0;

    /* We are handling offloads by segmenting l3 packet, so
     * no need to call OVS compat segmentation function. */

#ifdef HAVE___SKB_GSO_SEGMENT
#undef __skb_gso_segment
    segs = __skb_gso_segment(skb, 0, tx_path);
#else
#undef skb_gso_segment
    segs = skb_gso_segment(skb, 0);
#endif

    if (!segs || IS_ERR(segs))
        goto free;

    skb = segs;
    while (skb) {
        __skb_push(skb, pkt_hlen);
        skb_reset_mac_header(skb);
        skb_set_network_header(skb, outer_l3_offset);
        skb_set_transport_header(skb, outer_l4_offset);
        skb->mac_len = 0;

        memcpy(skb_network_header(skb), iph, pkt_hlen);
        memcpy(skb->cb, cb, sizeof(cb));

        skb->protocol = proto;
        if (skb->next)
            dst = dst_clone(dst);

        skb_dst_set(skb, dst);
        OVS_GSO_CB(skb)->fix_segment(skb);

        skb = skb->next;
    }
free:
    consume_skb(skb1);
    return segs;
}
Exemplo n.º 4
0
static int raw_send_hdrinc(struct sock *sk, void *from, int length,
			struct rtable *rt, 
			unsigned int flags)
{
	struct inet_sock *inet = inet_sk(sk);
	int hh_len;
	struct iphdr *iph;
	struct sk_buff *skb;
	int err;

	if (length > rt->u.dst.dev->mtu) {
		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
			       rt->u.dst.dev->mtu);
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);

	skb = sock_alloc_send_skb(sk, length+hh_len+15,
				  flags&MSG_DONTWAIT, &err);
	if (skb == NULL)
		goto error; 
	skb_reserve(skb, hh_len);

	skb->priority = sk->sk_priority;
	skb->dst = dst_clone(&rt->u.dst);

	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);

	skb->ip_summed = CHECKSUM_NONE;

	skb->h.raw = skb->nh.raw;
	err = memcpy_fromiovecend((void *)iph, from, 0, length);
	if (err)
		goto error_fault;

	/* We don't modify invalid header */
	if (length >= sizeof(*iph) && iph->ihl * 4 <= length) {
		if (!iph->saddr)
			iph->saddr = rt->rt_src;
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
			ip_select_ident(iph, &rt->u.dst, NULL);

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}

	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
		      dst_output);
	if (err > 0)
		err = inet->recverr ? net_xmit_errno(err) : 0;
	if (err)
		goto error;
out:
	return 0;

error_fault:
	err = -EFAULT;
	kfree_skb(skb);
error:
	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
	return err; 
}
Exemplo n.º 5
0
/* Reroute packet to local IPv4 stack after DNAT */
static int
__ip_vs_reroute_locally(struct sk_buff *skb)
{
	struct rtable *rt = skb_rtable(skb);
	struct net_device *dev = rt->dst.dev;
	struct net *net = dev_net(dev);
	struct iphdr *iph = ip_hdr(skb);

	if (rt_is_input_route(rt)) {
		unsigned long orefdst = skb->_skb_refdst;

		if (ip_route_input(skb, iph->daddr, iph->saddr,
				   iph->tos, skb->dev))
			return 0;
		refdst_drop(orefdst);
	} else {
		struct flowi4 fl4 = {
			.daddr = iph->daddr,
			.saddr = iph->saddr,
			.flowi4_tos = RT_TOS(iph->tos),
			.flowi4_mark = skb->mark,
		};

		rt = ip_route_output_key(net, &fl4);
		if (IS_ERR(rt))
			return 0;
		if (!(rt->rt_flags & RTCF_LOCAL)) {
			ip_rt_put(rt);
			return 0;
		}
		/* Drop old route. */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	}
	return 1;
}

#ifdef CONFIG_IP_VS_IPV6

static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
	return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
}

static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
			struct in6_addr *ret_saddr, int do_xfrm)
{
	struct dst_entry *dst;
	struct flowi6 fl6 = {
		.daddr = *daddr,
	};

	dst = ip6_route_output(net, NULL, &fl6);
	if (dst->error)
		goto out_err;
	if (!ret_saddr)
		return dst;
	if (ipv6_addr_any(&fl6.saddr) &&
	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
			       &fl6.daddr, 0, &fl6.saddr) < 0)
		goto out_err;
	if (do_xfrm) {
		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
		if (IS_ERR(dst)) {
			dst = NULL;
			goto out_err;
		}
	}
	ipv6_addr_copy(ret_saddr, &fl6.saddr);
	return dst;

out_err:
	dst_release(dst);
	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
	return NULL;
}

/*
 * Get route to destination or remote server
 */
static struct rt6_info *
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
		      int do_xfrm, int rt_mode)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct rt6_info *rt;			/* Route to the other host */
	struct rt6_info *ort;			/* Original route */
	struct dst_entry *dst;
	int local;

	if (dest) {
		spin_lock(&dest->dst_lock);
		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
		if (!rt) {
			u32 cookie;

			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
						      &dest->dst_saddr.in6,
						      do_xfrm);
			if (!dst) {
				spin_unlock(&dest->dst_lock);
				return NULL;
			}
			rt = (struct rt6_info *) dst;
			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
				  &dest->addr.in6, &dest->dst_saddr.in6,
				  atomic_read(&rt->dst.__refcnt));
		}
		if (ret_saddr)
			ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
		spin_unlock(&dest->dst_lock);
	} else {
		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
		if (!dst)
			return NULL;
		rt = (struct rt6_info *) dst;
	}

	local = __ip_vs_is_local_route6(rt);
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
			     local ? "local":"non-local", daddr);
		dst_release(&rt->dst);
		return NULL;
	}
	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
	    !((ort = (struct rt6_info *) skb_dst(skb)) &&
	      __ip_vs_is_local_route6(ort))) {
		IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
			     "requires NAT method, dest: %pI6\n",
			     &ipv6_hdr(skb)->daddr, daddr);
		dst_release(&rt->dst);
		return NULL;
	}
	if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
		     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
				    IPV6_ADDR_LOOPBACK)) {
		IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
			     "to non-local address, dest: %pI6\n",
			     &ipv6_hdr(skb)->saddr, daddr);
		dst_release(&rt->dst);
		return NULL;
	}

	return rt;
}
#endif


/*
 *	Release dest->dst_cache before a dest is removed
 */
void
ip_vs_dst_reset(struct ip_vs_dest *dest)
{
	struct dst_entry *old_dst;

	old_dst = dest->dst_cache;
	dest->dst_cache = NULL;
	dst_release(old_dst);
}

#define IP_VS_XMIT_TUNNEL(skb, cp)				\
({								\
	int __ret = NF_ACCEPT;					\
								\
	(skb)->ipvs_property = 1;				\
	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
		__ret = ip_vs_confirm_conntrack(skb);		\
	if (__ret == NF_ACCEPT) {				\
		nf_reset(skb);					\
		skb_forward_csum(skb);				\
	}							\
	__ret;							\
})

#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	else						\
		ip_vs_update_conntrack(skb, cp, 1);	\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)

#define IP_VS_XMIT(pf, skb, cp, local)			\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)


/*
 *      NULL transmitter (do nothing except return NF_ACCEPT)
 */
int
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp)
{
	/* we do not touch skb and do not need pskb ptr */
	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
Exemplo n.º 6
0
/* Get route to destination or remote server */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct rtable *rt;			/* Route to the other host */
	struct rtable *ort;			/* Original route */
	int local;

	if (dest) {
		spin_lock(&dest->dst_lock);
		if (!(rt = (struct rtable *)
		      __ip_vs_dst_check(dest, rtos))) {
			struct flowi4 fl4;

			memset(&fl4, 0, sizeof(fl4));
			fl4.daddr = dest->addr.ip;
			fl4.flowi4_tos = rtos;
			rt = ip_route_output_key(net, &fl4);
			if (IS_ERR(rt)) {
				spin_unlock(&dest->dst_lock);
				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
					     &dest->addr.ip);
				return NULL;
			}
			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
			dest->dst_saddr.ip = fl4.saddr;
			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
				  "rtos=%X\n",
				  &dest->addr.ip, &dest->dst_saddr.ip,
				  atomic_read(&rt->dst.__refcnt), rtos);
		}
		daddr = dest->addr.ip;
		if (ret_saddr)
			*ret_saddr = dest->dst_saddr.ip;
		spin_unlock(&dest->dst_lock);
	} else {
		struct flowi4 fl4;

		memset(&fl4, 0, sizeof(fl4));
		fl4.daddr = daddr;
		fl4.flowi4_tos = rtos;
		rt = ip_route_output_key(net, &fl4);
		if (IS_ERR(rt)) {
			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
				     &daddr);
			return NULL;
		}
		if (ret_saddr)
			*ret_saddr = fl4.saddr;
	}

	local = rt->rt_flags & RTCF_LOCAL;
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
			     (rt->rt_flags & RTCF_LOCAL) ?
			     "local":"non-local", &daddr);
		ip_rt_put(rt);
		return NULL;
	}
	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
	    !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
			     "requires NAT method, dest: %pI4\n",
			     &ip_hdr(skb)->daddr, &daddr);
		ip_rt_put(rt);
		return NULL;
	}
	if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
		IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
			     "to non-local address, dest: %pI4\n",
			     &ip_hdr(skb)->saddr, &daddr);
		ip_rt_put(rt);
		return NULL;
	}

	return rt;
}
Exemplo n.º 7
0
static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
{
	struct sk_buff *skb;
	struct mrt6msg *msg;
	int ret;

#ifdef CONFIG_IPV6_PIMSM_V2
	if (assert == MRT6MSG_WHOLEPKT)
		skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
						+sizeof(*msg));
	else
#endif
		skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);

	if (!skb)
		return -ENOBUFS;

	/* I suppose that internal messages
	 * do not require checksums */

	skb->ip_summed = CHECKSUM_UNNECESSARY;

#ifdef CONFIG_IPV6_PIMSM_V2
	if (assert == MRT6MSG_WHOLEPKT) {
		/* Ugly, but we have no choice with this interface.
		   Duplicate old header, fix length etc.
		   And all this only to mangle msg->im6_msgtype and
		   to set msg->im6_mbz to "mbz" :-)
		 */
		skb_push(skb, -skb_network_offset(pkt));

		skb_push(skb, sizeof(*msg));
		skb_reset_transport_header(skb);
		msg = (struct mrt6msg *)skb_transport_header(skb);
		msg->im6_mbz = 0;
		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
		msg->im6_mif = reg_vif_num;
		msg->im6_pad = 0;
		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);

		skb->ip_summed = CHECKSUM_UNNECESSARY;
	} else
#endif
	{
	/*
	 *	Copy the IP header
	 */

	skb_put(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));

	/*
	 *	Add our header
	 */
	skb_put(skb, sizeof(*msg));
	skb_reset_transport_header(skb);
	msg = (struct mrt6msg *)skb_transport_header(skb);

	msg->im6_mbz = 0;
	msg->im6_msgtype = assert;
	msg->im6_mif = mifi;
	msg->im6_pad = 0;
	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);

	skb->dst = dst_clone(pkt->dst);
	skb->ip_summed = CHECKSUM_UNNECESSARY;

	skb_pull(skb, sizeof(struct ipv6hdr));
	}

	if (mroute6_socket == NULL) {
		kfree_skb(skb);
		return -EINVAL;
	}

	/*
	 *	Deliver to user space multicast routing algorithms
	 */
	if ((ret = sock_queue_rcv_skb(mroute6_socket, skb)) < 0) {
		if (net_ratelimit())
			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
		kfree_skb(skb);
	}

	return ret;
}
//-----------------------------------------------------------------------------
static int _gtpusp_ksocket_process_gtp(const unsigned char * const rx_buf_pP, const int lenP, unsigned char* tx_buf_pP)
//-----------------------------------------------------------------------------
{
  gtpv1u_msg_t                gtpv1u_msg;
  uint8_t                     msg_type;
  struct iphdr               *iph_p    = NULL;
  struct iphdr               *new_iph_p= NULL;
  struct sk_buff             *skb_p    = NULL;
  const unsigned char *       rx_buf_p = rx_buf_pP;
  int                         err      = 0;
  struct rtable  *rt              = NULL;
  struct flowi                fl = {
	      .u = {
	        .ip4 = {
		          .daddr        = 0,
		          .flowi4_tos   = 0,
		          .flowi4_scope = RT_SCOPE_UNIVERSE,
		    }
	      }
	    };

  msg_type = rx_buf_pP[1];

  switch(msg_type) {
  case GTP_ECHO_REQ:
    PR_INFO(MODULE_NAME": TODO GTP ECHO_REQ, SEND TO GTPV1U TASK USER SPACE\n");
    //TODO;
    return 0;
    break;

  case GTP_ERROR_INDICATION:
	PR_INFO(MODULE_NAME":TODO GTP ERROR INDICATION, SEND TO GTPV1U TASK USER SPACE\n");
    //TODO;
    return 0;
    break;

  case GTP_ECHO_RSP:
    PR_INFO(MODULE_NAME":GTP ECHO_RSP, SEND TO GTPV1U TASK USER SPACE\n");
    return 0;
    break;

  case GTP_GPDU: {
    gtpv1u_msg.version       = ((*rx_buf_p) & 0xE0) >> 5;
    gtpv1u_msg.protocol_type = ((*rx_buf_p) & 0x10) >> 4;
    gtpv1u_msg.ext_hdr_flag  = ((*rx_buf_p) & 0x04) >> 2;
    gtpv1u_msg.seq_num_flag  = ((*rx_buf_p) & 0x02) >> 1;
    gtpv1u_msg.npdu_num_flag = ((*rx_buf_p) & 0x01);
    rx_buf_p++;

    gtpv1u_msg.msg_type      = *(rx_buf_p);
    rx_buf_p++;

    rx_buf_p += 2;

    gtpv1u_msg.teid          = ntohl(*((u_int32_t *)rx_buf_p));
    rx_buf_p += 4;

    if(gtpv1u_msg.ext_hdr_flag || gtpv1u_msg.seq_num_flag || gtpv1u_msg.npdu_num_flag) {
      gtpv1u_msg.seq_num             = ntohs(*(((u_int16_t *)rx_buf_p)));
      rx_buf_p                         += 2;
      gtpv1u_msg.npdu_num            = *(rx_buf_p++);
      gtpv1u_msg.next_ext_hdr_type   = *(rx_buf_p++);
    }

    gtpv1u_msg.msg_buf_offset = (u_int32_t)(rx_buf_p - rx_buf_pP);
    gtpv1u_msg.msg_buf_len = lenP - gtpv1u_msg.msg_buf_offset;
    gtpv1u_msg.msg_len    = lenP;

	iph_p            = (struct iphdr*)(&rx_buf_pP[gtpv1u_msg.msg_buf_offset]);

    fl.u.ip4.daddr        = iph_p->daddr;
    fl.u.ip4.flowi4_tos   = RT_TOS(iph_p->tos);

    rt = ip_route_output_key(&init_net, &fl.u.ip4);

    if (rt == NULL) {
      PR_INFO("GTPURH: Failed to route packet to dst 0x%x. Error: (%d)\n", fl.u.ip4.daddr, err);
      return NF_DROP;
    }

    if (rt->dst.dev == NULL) {
      pr_info("GTPURH: dst dev NULL\n");
      return 0;
    }

    skb_p = alloc_skb(LL_MAX_HEADER + ntohs(iph_p->tot_len), GFP_ATOMIC);
    if (skb_p == NULL) {
      return 0;
      }
    skb_p->priority = rt_tos2priority(iph_p->tos);
    skb_p->pkt_type = PACKET_OTHERHOST;
    skb_dst_set(skb_p, dst_clone(&rt->dst));
    skb_p->dev      = skb_dst(skb_p)->dev;

    skb_reserve(skb_p, LL_MAX_HEADER + ntohs(iph_p->tot_len));
    skb_p->protocol = htons(ETH_P_IP);

    new_iph_p = (void *)skb_push(skb_p, ntohs(iph_p->tot_len) - (iph_p->ihl << 2));
    skb_reset_transport_header(skb_p);
    new_iph_p = (void *)skb_push(skb_p, iph_p->ihl << 2);
    memcpy(new_iph_p, iph_p, ntohs(iph_p->tot_len));
    skb_reset_network_header(skb_p);
    skb_reset_inner_network_header(skb_p);
    skb_reset_inner_transport_header(skb_p);

    skb_p->mark = gtpv1u_msg.teid;

    new_iph_p->ttl        = ip4_dst_hoplimit(skb_dst(skb_p));
    skb_p->ip_summed = CHECKSUM_NONE;

    if (skb_p->len > dst_mtu(skb_dst(skb_p))) {
      PR_INFO("GTPURH: bad length\n");
      goto free_skb;
        }
    ip_local_out(skb_p);
    return 0;
free_skb:
    pr_info("GTPURH: Dropped skb\n");
    kfree_skb(skb_p);
    return 0;
  }
  break;

  default:
    PR_INFO(MODULE_NAME":ERROR GTPU msg type %u\n", msg_type);
    return 0;
  }
}
Exemplo n.º 9
0
static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
			 struct flowi *fl,
			 int encap_limit,
			 __u32 *pmtu)
{
	struct net *net = dev_net(dev);
	struct ip6_tnl *t = netdev_priv(dev);
	struct net_device_stats *stats = &t->dev->stats;
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	struct ipv6_tel_txoption opt;
	struct dst_entry *dst;
	struct net_device *tdev;
	int mtu;
	unsigned int max_headroom = sizeof(struct ipv6hdr);
	u8 proto;
	int err = -1;
	int pkt_len;

	if ((dst = ip6_tnl_dst_check(t)) != NULL)
		dst_hold(dst);
	else {
		dst = ip6_route_output(net, NULL, fl);

		if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
			goto tx_err_link_failure;
	}

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
		if (net_ratelimit())
			printk(KERN_WARNING
			       "%s: Local routing loop detected!\n",
			       t->parms.name);
		goto tx_err_dst_release;
	}
	mtu = dst_mtu(dst) - sizeof (*ipv6h);
	if (encap_limit >= 0) {
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
	if (skb->len > mtu) {
		*pmtu = mtu;
		err = -EMSGSIZE;
		goto tx_err_dst_release;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom += LL_RESERVED_SPACE(tdev);

	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
		struct sk_buff *new_skb;

		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
			goto tx_err_dst_release;

		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		kfree_skb(skb);
		skb = new_skb;
	}
	skb_dst_drop(skb);
	skb_dst_set(skb, dst_clone(dst));

	skb->transport_header = skb->network_header;

	proto = fl->proto;
	if (encap_limit >= 0) {
		init_tel_txopt(&opt, encap_limit);
		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
	}
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	ipv6h = ipv6_hdr(skb);
	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
	dsfield = INET_ECN_encapsulate(0, dsfield);
	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
	ipv6h->hop_limit = t->parms.hop_limit;
	ipv6h->nexthdr = proto;
	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
	nf_reset(skb);
	pkt_len = skb->len;
	err = ip6_local_out(skb);

	if (net_xmit_eval(err) == 0) {
		stats->tx_bytes += pkt_len;
		stats->tx_packets++;
	} else {
		stats->tx_errors++;
		stats->tx_aborted_errors++;
	}
	ip6_tnl_dst_store(t, dst);
	return 0;
tx_err_link_failure:
	stats->tx_carrier_errors++;
	dst_link_failure(skb);
tx_err_dst_release:
	dst_release(dst);
	return err;
}
Exemplo n.º 10
0
int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
		   struct flowi *fl, unsigned length,
		   struct ipv6_txoptions *opt, int hlimit, int flags)
{
	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
	struct in6_addr *final_dst = NULL;
	struct dst_entry *dst;
	int err = 0;
	unsigned int pktlength, jumbolen, mtu;

	if (opt && opt->srcrt) {
		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
		final_dst = fl->nl_u.ip6_u.daddr;
		fl->nl_u.ip6_u.daddr = rt0->addr;
	}

	if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
		fl->oif = np->mcast_oif;

	dst = NULL;
	if (sk->dst_cache) {
		dst = dst_check(&sk->dst_cache, np->dst_cookie);
		if (dst) {
			struct rt6_info *rt = (struct rt6_info*)dst_clone(dst);

			/* Yes, checking route validity in not connected
			   case is not very simple. Take into account,
			   that we do not support routing by source, TOS,
			   and MSG_DONTROUTE 		--ANK (980726)

			   1. If route was host route, check that
			      cached destination is current.
			      If it is network route, we still may
			      check its validity using saved pointer
			      to the last used address: daddr_cache.
			      We do not want to save whole address now,
			      (because main consumer of this service
			       is tcp, which has not this problem),
			      so that the last trick works only on connected
			      sockets.
			   2. oif also should be the same.
			 */
			if (((rt->rt6i_dst.plen != 128 ||
			      ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
			     && (np->daddr_cache == NULL ||
				 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
			    || (fl->oif && fl->oif != dst->dev->ifindex)) {
				dst_release(dst);
				dst = NULL;
			}
		}
	}

	if (dst == NULL)
		dst = ip6_route_output(sk, fl);

	if (dst->error) {
		ipv6_statistics.Ip6OutNoRoutes++;
		dst_release(dst);
		return -ENETUNREACH;
	}

	if (fl->nl_u.ip6_u.saddr == NULL) {
		struct inet6_ifaddr *ifa;
		
		ifa = ipv6_get_saddr(dst, fl->nl_u.ip6_u.daddr);

		if (ifa == NULL) {
#if IP6_DEBUG >= 2
			printk(KERN_DEBUG "ip6_build_xmit: "
			       "no availiable source address\n");
#endif
			err = -ENETUNREACH;
			goto out;
		}
		fl->nl_u.ip6_u.saddr = &ifa->addr;
	}
	pktlength = length;

	if (hlimit < 0) {
		if (ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
			hlimit = np->mcast_hops;
		else
			hlimit = np->hop_limit;
		if (hlimit < 0)
			hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
	}

	jumbolen = 0;

	if (!sk->ip_hdrincl) {
		pktlength += sizeof(struct ipv6hdr);
		if (opt)
			pktlength += opt->opt_flen + opt->opt_nflen;

		if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
			/* Jumbo datagram.
			   It is assumed, that in the case of sk->ip_hdrincl
			   jumbo option is supplied by user.
			 */
			pktlength += 8;
			jumbolen = pktlength - sizeof(struct ipv6hdr);
		}
	}

	mtu = dst->pmtu;
	if (np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
		else if (np->pmtudisc == IPV6_PMTUDISC_DONT)
			mtu = IPV6_MIN_MTU;
	}

	/* Critical arithmetic overflow check.
	   FIXME: may gcc optimize it out? --ANK (980726)
	 */
	if (pktlength < length) {
		ipv6_local_error(sk, EMSGSIZE, fl, mtu);
		err = -EMSGSIZE;
		goto out;
	}

	if (pktlength <= mtu) {
		struct sk_buff *skb;
		struct ipv6hdr *hdr;
		struct device *dev = dst->dev;

		skb = sock_alloc_send_skb(sk, pktlength + 15 +
					  dev->hard_header_len, 0,
					  flags & MSG_DONTWAIT, &err);

		if (skb == NULL) {
			ipv6_statistics.Ip6OutDiscards++;
			goto out;
		}

		skb->dst = dst_clone(dst);

		skb_reserve(skb, (dev->hard_header_len + 15) & ~15);

		hdr = (struct ipv6hdr *) skb->tail;
		skb->nh.ipv6h = hdr;

		if (!sk->ip_hdrincl) {
			ip6_bld_1(sk, skb, fl, hlimit,
				  jumbolen ? sizeof(struct ipv6hdr) : pktlength);

			if (opt || jumbolen) {
				u8 *prev_hdr = &hdr->nexthdr;
				prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
				if (opt && opt->opt_flen)
					ipv6_build_frag_opts(skb, prev_hdr, opt);
			}
		}

		skb_put(skb, length);
		err = getfrag(data, &hdr->saddr,
			      ((char *) hdr) + (pktlength - length),
			      0, length);

		if (!err) {
			ipv6_statistics.Ip6OutRequests++;
			dst->output(skb);
		} else {
			err = -EFAULT;
			kfree_skb(skb);
		}
	} else {
		if (sk->ip_hdrincl || jumbolen ||
		    np->pmtudisc == IPV6_PMTUDISC_DO) {
			ipv6_local_error(sk, EMSGSIZE, fl, mtu);
			err = -EMSGSIZE;
			goto out;
		}

		err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
				    flags, length, mtu);
	}

	/*
	 *	cleanup
	 */
out:
	ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
	return err;
}
Exemplo n.º 11
0
static int ip6_frag_xmit(struct sock *sk, inet_getfrag_t getfrag,
			 const void *data, struct dst_entry *dst,
			 struct flowi *fl, struct ipv6_txoptions *opt,
			 struct in6_addr *final_dst,
			 int hlimit, int flags, unsigned length, int mtu)
{
	struct ipv6hdr *hdr;
	struct sk_buff *last_skb;
	u8 *prev_hdr;
	int unfrag_len;
	int frag_len;
	int last_len;
	int nfrags;
	int fhdr_dist;
	int frag_off;
	int data_off;
	int err;

	/*
	 *	Fragmentation
	 *
	 *	Extension header order:
	 *	Hop-by-hop -> Dest0 -> Routing -> Fragment -> Auth -> Dest1 -> rest (...)
	 *	
	 *	We must build the non-fragmented part that
	 *	will be in every packet... this also means
	 *	that other extension headers (Dest, Auth, etc)
	 *	must be considered in the data to be fragmented
	 */

	unfrag_len = sizeof(struct ipv6hdr) + sizeof(struct frag_hdr);
	last_len = length;

	if (opt) {
		unfrag_len += opt->opt_nflen;
		last_len += opt->opt_flen;
	}

	/*
	 *	Length of fragmented part on every packet but 
	 *	the last must be an:
	 *	"integer multiple of 8 octects".
	 */

	frag_len = (mtu - unfrag_len) & ~0x7;

	/* Unfragmentable part exceeds mtu. */
	if (frag_len <= 0) {
		ipv6_local_error(sk, EMSGSIZE, fl, mtu);
		return -EMSGSIZE;
	}

	nfrags = last_len / frag_len;

	/*
	 *	We must send from end to start because of 
	 *	UDP/ICMP checksums. We do a funny trick:
	 *	fill the last skb first with the fixed
	 *	header (and its data) and then use it
	 *	to create the following segments and send it
	 *	in the end. If the peer is checking the M_flag
	 *	to trigger the reassembly code then this 
	 *	might be a good idea.
	 */

	frag_off = nfrags * frag_len;
	last_len -= frag_off;

	if (last_len == 0) {
		last_len = frag_len;
		frag_off -= frag_len;
		nfrags--;
	}
	data_off = frag_off;

	/* And it is implementation problem: for now we assume, that
	   all the exthdrs will fit to the first fragment.
	 */
	if (opt) {
		if (frag_len < opt->opt_flen) {
			ipv6_local_error(sk, EMSGSIZE, fl, mtu);
			return -EMSGSIZE;
		}
		data_off = frag_off - opt->opt_flen;
	}

	last_skb = sock_alloc_send_skb(sk, unfrag_len + frag_len +
				       dst->dev->hard_header_len + 15,
				       0, flags & MSG_DONTWAIT, &err);

	if (last_skb == NULL)
		return err;

	last_skb->dst = dst_clone(dst);

	skb_reserve(last_skb, (dst->dev->hard_header_len + 15) & ~15);

	hdr = ip6_bld_1(sk, last_skb, fl, hlimit, frag_len+unfrag_len);
	prev_hdr = &hdr->nexthdr;

	if (opt && opt->opt_nflen)
		prev_hdr = ipv6_build_nfrag_opts(last_skb, prev_hdr, opt, final_dst, 0);

	prev_hdr = ipv6_build_fraghdr(last_skb, prev_hdr, frag_off);
	fhdr_dist = prev_hdr - last_skb->data;

	err = getfrag(data, &hdr->saddr, last_skb->tail, data_off, last_len);

	if (!err) {
		while (nfrags--) {
			struct sk_buff *skb;
			
			struct frag_hdr *fhdr2;
				
			skb = skb_copy(last_skb, sk->allocation);

			if (skb == NULL) {
				ipv6_statistics.Ip6FragFails++;
				kfree_skb(last_skb);
				return -ENOMEM;
			}
			
			frag_off -= frag_len;
			data_off -= frag_len;

			fhdr2 = (struct frag_hdr *) (skb->data + fhdr_dist);

			/* more flag on */
			fhdr2->frag_off = htons(frag_off | 1);

			/* Write fragmentable exthdrs to the first chunk */
			if (nfrags == 0 && opt && opt->opt_flen) {
				ipv6_build_frag_opts(skb, &fhdr2->nexthdr, opt);
				frag_len -= opt->opt_flen;
				data_off = 0;
			}

			err = getfrag(data, &hdr->saddr,skb_put(skb, frag_len),
				      data_off, frag_len);

			if (err) {
				kfree_skb(skb);
				break;
			}

			ipv6_statistics.Ip6FragCreates++;
			ipv6_statistics.Ip6OutRequests++;
			dst->output(skb);
		}
	}

	if (err) {
		ipv6_statistics.Ip6FragFails++;
		kfree_skb(last_skb);
		return -EFAULT;
	}

	hdr->payload_len = htons(unfrag_len + last_len - sizeof(struct ipv6hdr));

	/*
	 *	update last_skb to reflect the getfrag we did
	 *	on start.
	 */

	skb_put(last_skb, last_len);

	ipv6_statistics.Ip6FragCreates++;
	ipv6_statistics.Ip6FragOKs++;
	ipv6_statistics.Ip6OutRequests++;
	dst->output(last_skb);

	return 0;
}
Exemplo n.º 12
0
struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
{
	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);

	if (!n) 
		return NULL;

#define C(x) n->x = skb->x

	n->next = n->prev = NULL;
	n->list = NULL;
	n->sk = NULL;
	C(stamp);
	C(dev);
	C(real_dev);
	C(h);
	C(nh);
	C(mac);
	C(dst);
	dst_clone(skb->dst);
	C(sp);
#ifdef CONFIG_INET
	secpath_get(skb->sp);
#endif
	memcpy(n->cb, skb->cb, sizeof(skb->cb));
	C(len);
	C(data_len);
	C(csum);
	C(local_df);
	n->cloned = 1;
	n->nohdr = 0;
	C(pkt_type);
	C(ip_summed);
	C(priority);
	C(protocol);
	C(security);
	n->destructor = NULL;
#ifdef CONFIG_NETFILTER
	C(nfmark);
	C(nfcache);
	C(nfct);
	nf_conntrack_get(skb->nfct);
	C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
	C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
	C(nf_bridge);
	nf_bridge_get(skb->nf_bridge);
#endif
#endif /*CONFIG_NETFILTER*/
#if defined(CONFIG_HIPPI)
	C(private);
#endif
#ifdef CONFIG_NET_SCHED
	C(tc_index);
#ifdef CONFIG_NET_CLS_ACT
	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
	C(input_dev);
	C(tc_classid);
#endif

#endif
	C(truesize);
	atomic_set(&n->users, 1);
	C(head);
	C(data);
	C(tail);
	C(end);

	atomic_inc(&(skb_shinfo(skb)->dataref));
	skb->cloned = 1;

	return n;
}
Exemplo n.º 13
0
static int
__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
		      struct flowi *fl, struct dst_entry **dst_p)
{
	struct dst_entry *dst, *dst_prev;
	struct rtable *rt0 = (struct rtable*)(*dst_p);
	struct rtable *rt = rt0;
	struct flowi fl_tunnel = {
		.nl_u = {
			.ip4_u = {
				.saddr = fl->fl4_src,
				.daddr = fl->fl4_dst,
				.tos = fl->fl4_tos
			}
		}
	};
	int i;
	int err;
	int header_len = 0;
	int trailer_len = 0;

	dst = dst_prev = NULL;
	dst_hold(&rt->u.dst);

	for (i = 0; i < nx; i++) {
		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
		struct xfrm_dst *xdst;

		if (unlikely(dst1 == NULL)) {
			err = -ENOBUFS;
			dst_release(&rt->u.dst);
			goto error;
		}

		if (!dst)
			dst = dst1;
		else {
			dst_prev->child = dst1;
			dst1->flags |= DST_NOHASH;
			dst_clone(dst1);
		}

		xdst = (struct xfrm_dst *)dst1;
		xdst->route = &rt->u.dst;
		xdst->genid = xfrm[i]->genid;

		dst1->next = dst_prev;
		dst_prev = dst1;

		header_len += xfrm[i]->props.header_len;
		trailer_len += xfrm[i]->props.trailer_len;

		if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
			unsigned short encap_family = xfrm[i]->props.family;
			switch(encap_family) {
			case AF_INET:
				fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
				fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
				break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
			case AF_INET6:
				ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
				ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
				break;
#endif
			default:
				BUG_ON(1);
			}
			err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
					      &fl_tunnel, encap_family);
			if (err)
				goto error;
		} else
			dst_hold(&rt->u.dst);
	}

	dst_prev->child = &rt->u.dst;
	dst->path = &rt->u.dst;

	*dst_p = dst;
	dst = dst_prev;

	dst_prev = *dst_p;
	i = 0;
	for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
		struct xfrm_state_afinfo *afinfo;
		x->u.rt.fl = *fl;

		dst_prev->xfrm = xfrm[i++];
		dst_prev->dev = rt->u.dst.dev;
		dst_prev->obsolete	= -1;
		dst_prev->flags	       |= DST_HOST;
		dst_prev->lastuse	= jiffies;
		dst_prev->header_len	= header_len;
		dst_prev->nfheader_len	= 0;
		dst_prev->trailer_len	= trailer_len;
		memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));

		/* Copy neighbout for reachability confirmation */
		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
		dst_prev->input		= rt->u.dst.input;
		/* XXX: When IPv6 module can be unloaded, we should manage reference
		 * to xfrm6_output in afinfo->output. Miyazawa
		 * */
		afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
		if (!afinfo) {
			dst = *dst_p;
			err = -EAFNOSUPPORT;
			goto error;
		}
		dst_prev->output = afinfo->output;
		xfrm_state_put_afinfo(afinfo);
		if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
			atomic_inc(&rt->peer->refcnt);
		x->u.rt.peer = rt->peer;
		/* Sheit... I remember I did this right. Apparently,
		 * it was magically lost, so this code needs audit */
		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
		x->u.rt.rt_type = rt->rt_type;
		x->u.rt.rt_src = rt0->rt_src;
		x->u.rt.rt_dst = rt0->rt_dst;
		x->u.rt.rt_gateway = rt->rt_gateway;
		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
		header_len -= x->u.dst.xfrm->props.header_len;
		trailer_len -= x->u.dst.xfrm->props.trailer_len;
	}

	xfrm_init_pmtu(dst);
	return 0;

error:
	if (dst)
		dst_free(dst);
	return err;
}