int udpv6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
	struct ipv6_pinfo      	*np = &sk->net_pinfo.af_inet6;
	struct in6_addr		*daddr;
	struct in6_addr		saddr;
	struct dst_entry	*dst;
	struct flowi		fl;
	struct ip6_flowlabel	*flowlabel = NULL;
	int			addr_type;
	int			err;

	if (usin->sin6_family == AF_INET) {
		if (__ipv6_only_sock(sk))
			return -EAFNOSUPPORT;
		err = udp_connect(sk, uaddr, addr_len);
		goto ipv4_connected;
	}

	if (addr_len < SIN6_LEN_RFC2133)
	  	return -EINVAL;

	if (usin->sin6_family != AF_INET6) 
	  	return -EAFNOSUPPORT;

	fl.fl6_flowlabel = 0;
	if (np->sndflow) {
		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
			if (flowlabel == NULL)
				return -EINVAL;
			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
		}
	}

	addr_type = ipv6_addr_type(&usin->sin6_addr);

	if (addr_type == IPV6_ADDR_ANY) {
		/*
		 *	connect to self
		 */
		usin->sin6_addr.s6_addr[15] = 0x01;
	}

	daddr = &usin->sin6_addr;

	if (addr_type == IPV6_ADDR_MAPPED) {
		struct sockaddr_in sin;

		if (__ipv6_only_sock(sk))
			return -ENETUNREACH;

		sin.sin_family = AF_INET;
		sin.sin_addr.s_addr = daddr->s6_addr32[3];
		sin.sin_port = usin->sin6_port;

		err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin));

ipv4_connected:
		if (err < 0)
			return err;
		
		ipv6_addr_set(&np->daddr, 0, 0, 
			      htonl(0x0000ffff),
			      sk->daddr);

		if(ipv6_addr_any(&np->saddr)) {
			ipv6_addr_set(&np->saddr, 0, 0, 
				      htonl(0x0000ffff),
				      sk->saddr);
		}

		if(ipv6_addr_any(&np->rcv_saddr)) {
			ipv6_addr_set(&np->rcv_saddr, 0, 0, 
				      htonl(0x0000ffff),
				      sk->rcv_saddr);
		}
		return 0;
	}

	if (addr_type&IPV6_ADDR_LINKLOCAL) {
		if (addr_len >= sizeof(struct sockaddr_in6) &&
		    usin->sin6_scope_id) {
			if (sk->bound_dev_if && sk->bound_dev_if != usin->sin6_scope_id) {
				fl6_sock_release(flowlabel);
				return -EINVAL;
			}
			sk->bound_dev_if = usin->sin6_scope_id;
			if (!sk->bound_dev_if && (addr_type&IPV6_ADDR_MULTICAST))
				fl.oif = np->mcast_oif;
		}

#ifndef CONFIG_IPV6_LOOSE_SCOPE_ID
		/* Connect to link-local address requires an interface */
		if (sk->bound_dev_if == 0){
			fl6_sock_release(flowlabel);
			return -EINVAL;
		}
#endif
	}

	ipv6_addr_copy(&np->daddr, daddr);
	np->flow_label = fl.fl6_flowlabel;

	sk->dport = usin->sin6_port;

	/*
	 *	Check for a route to destination an obtain the
	 *	destination cache for it.
	 */

	fl.proto = IPPROTO_UDP;
	fl.fl6_dst = &np->daddr;
	fl.fl6_src = &saddr;
	fl.oif = sk->bound_dev_if;
	fl.uli_u.ports.dport = sk->dport;
	fl.uli_u.ports.sport = sk->sport;

	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
		fl.oif = np->mcast_oif;

	if (flowlabel) {
		if (flowlabel->opt && flowlabel->opt->srcrt) {
			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
			fl.fl6_dst = rt0->addr;
		}
	} else if (np->opt && np->opt->srcrt) {
		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
		fl.fl6_dst = rt0->addr;
	}

	dst = ip6_route_output(sk, &fl);

	if ((err = dst->error) != 0) {
		dst_release(dst);
		fl6_sock_release(flowlabel);
		return err;
	}

	ip6_dst_store(sk, dst, fl.fl6_dst);

	/* get the source adddress used in the apropriate device */

	err = ipv6_get_saddr(dst, daddr, &saddr, np->use_tempaddr);

	if (err == 0) {
		if(ipv6_addr_any(&np->saddr))
			ipv6_addr_copy(&np->saddr, &saddr);

		if(ipv6_addr_any(&np->rcv_saddr)) {
			ipv6_addr_copy(&np->rcv_saddr, &saddr);
			sk->rcv_saddr = LOOPBACK4_IPV6;
		}
		sk->state = TCP_ESTABLISHED;
	}
	fl6_sock_release(flowlabel);

	return err;
}
Esempio n. 2
0
/* after ipt_filter */
static unsigned int ezp_nat_pre_hook(unsigned int hooknum, 
        struct sk_buff *skb, const struct net_device *indev,
        const struct net_device *outdev, 
        int (*okfn)(struct sk_buff *))
{
    struct nf_conn *ct;
    enum ip_conntrack_info ctinfo;
    int ret = NF_ACCEPT;
    enum ip_conntrack_dir dir;
    __u32 dnat_addr = 0, snat_addr = 0;    
    int* nat_flag;
    struct dst_entry** dst_to_use = NULL;
    struct iphdr *iph = ip_hdr(skb);
    struct icmphdr *hdr = icmp_hdr(skb);
    struct tcphdr *tcph = tcp_hdr(skb);
    /* EZP: enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); */

    if(!ezp_nat_enable_flag){
        return NF_ACCEPT;
    }
    ct = nf_ct_get(skb, &ctinfo);
    if (!ct) {
        if (iph->protocol == IPPROTO_ICMP
                && hdr->type == ICMP_REDIRECT)
            return NF_DROP;
        return NF_ACCEPT;
    }
    
    /* TCP or UDP. */
    if ((iph->protocol != IPPROTO_TCP) &&
            (iph->protocol != IPPROTO_UDP) ) {
        return NF_ACCEPT;
    }
    if ((iph->protocol == IPPROTO_TCP) && 
            ((tcp_flag_word(tcph) & (TCP_FLAG_RST | TCP_FLAG_SYN)) == 
             TCP_FLAG_SYN)) {
        return NF_ACCEPT;
    }
    /* Make sure it is confirmed. */
    if (!nf_ct_is_confirmed(ct)) {
        return NF_ACCEPT;
    } 
    /* We comment out this part since
    ((tcp_flag_word((*pskb)->h.th) == TCP_FLAG_SYN) ||
     * 1. conntrack establishing is a 2 way process, but after routing, we have
     * established routing entry and address resolution table, so we don't
     * need to check ESTABLISH state.
     * 2. With establishing state, we need to go through forward state and 
     * routing several times. It may occur that our holded entry may be
     * replaced. */
    /*
    if ((ctinfo != IP_CT_ESTABLISHED) &&
        (ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY)) {
        return NF_ACCEPT;
    }
    */
    dir = CTINFO2DIR(ctinfo);
    if (dir == IP_CT_DIR_ORIGINAL) {
        if (!ct->orgdir_dst) {
            return NF_ACCEPT;
        } else {
            nat_flag = &ct->orgdir_rid;
            if (!(*nat_flag & ((1 << IP_NAT_MANIP_DST) | 
			(1 << IP_NAT_MANIP_SRC) |
			(1 << EZP_IP_LOCAL_IN)))) {
                return NF_ACCEPT;
            }
            /* Check only in forward case and ignore input case */
            if (!(*nat_flag & (1 << EZP_IP_LOCAL_IN))) {
                if ((!ct->orgdir_dst->hh) && (!ct->orgdir_dst->neighbour)) {
                    printk("%s:orig dst and neighbour null dir\n",__FUNCTION__);
                    return NF_ACCEPT;
                }
            }
            if (skb->dst) {
                /* skb might has its own dst already. 
                 * e.g. output to local input */
                dst_release(skb->dst);
            } 
            skb->protocol = htons(ETH_P_IP);
            skb->dst = ct->orgdir_dst;
            /* XXX: */
            skb->dev = ct->orgdir_dst->dev;
            /* skb uses this dst_entry */
            dst_use(skb->dst, jiffies);
            dst_to_use = &ct->orgdir_dst;
        }
    } else {
        /* IP_CT_DIR_REPLY */
        if (!ct->replydir_dst) {
            return NF_ACCEPT;
        } else {
            nat_flag = &ct->replydir_rid;
            if (!(*nat_flag & ((1 << IP_NAT_MANIP_DST) | 
			(1 << IP_NAT_MANIP_SRC) |
			(1 << EZP_IP_LOCAL_IN)))) {
                return NF_ACCEPT;
            }
            /* Check only in forward case and ignore input case */
            if (!(*nat_flag & (1 << EZP_IP_LOCAL_IN))) {
                if ((!ct->replydir_dst->hh) && (!ct->replydir_dst->neighbour)) {
                    printk("%s:reply dst and neighbour null\n",__FUNCTION__);
                    return NF_ACCEPT;
                }
            } 
            if (skb->dst) {
                /* skb might has its own dst already. */
                /* e.g. output to local input */
                dst_release(skb->dst);
            } 
            skb->protocol = htons(ETH_P_IP);
            skb->dst = ct->replydir_dst;
            /* XXX: */
            skb->dev = ct->replydir_dst->dev;
            /* skb uses this dst_entry */
            dst_use(skb->dst, jiffies);
            dst_to_use = &ct->replydir_dst;
        }
    }

    /* After this point, every "return NF_ACCEPT" action need to release
     * holded dst entry. So we use "goto release_dst_and_return" to handle the
     * action commonly. */
    /* EZP:
    if (!nf_nat_initialized(ct, maniptype)) {
        goto release_dst_and_return;
    }
    */
    /* If we have helper, we need to go original path until conntrack
     * confirmed */
    if(nfct_help(ct)){
        goto release_dst_and_return;
    }

    if (dir == IP_CT_DIR_ORIGINAL) {
        (skb)->imq_flags = ct->ct_orig_imq_flags;
    }
    else{
        (skb)->imq_flags = ct->ct_repl_imq_flags;
    }

    /* PRE_ROUTING NAT */
    /* Assume DNAT conntrack is ready. */
    if ((*nat_flag & (1 << IP_NAT_MANIP_DST))){
        dnat_addr = iph->daddr;
        ret = nf_nat_packet(ct, ctinfo, NF_INET_PRE_ROUTING, skb);
        if (ret != NF_ACCEPT) {
            goto release_dst_and_return;
        }
        if (dnat_addr == iph->daddr) {
            *nat_flag &= ~(1 << IP_NAT_MANIP_DST);
        }
    } 
    /* INPUT */
    if ((*nat_flag & (1 << EZP_IP_LOCAL_IN))){
        /* TODO: use ip_local_deliver_finish() and add ip_defrag(). */
        /* XXX: Not sure this will hit or not. */
        /*
         *	Reassemble IP fragments.
         */

        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
            if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) {
                /* If return value is not 0, defrag error */
                /* return 0; */
                /* XXX: return NF_STOLEN? */
                goto release_dst_and_return;
            }
        }
        /* For INPUT path, there is no need to check dst_mtu but defrag.
        if (skb->len > dst_mtu(&((struct rtable*)skb->dst)->u.dst)) {
            goto release_dst_and_return;
        }*/
        if (ezp_nat_queue_enable_flag) {
        if ((skb)->imq_flags & IMQ_F_ENQUEUE) {
            struct nf_hook_ops *elem = nf_get_imq_ops();
            /* With to apply IMQ, we have to check the IMQ flag, if the flag is
             * set, we have to enquene this skb and leave it to IMQ*/
            if (elem != NULL) {
                nf_queue(skb, (struct list_head*)elem, AF_INET, 
                        NF_INET_POST_ROUTING, 
                        (struct net_device*)indev, 
                        (struct net_device*)
                            ((struct rtable*)skb->dst)->u.dst.dev, 
                        ip_local_deliver_finish, NF_ACCEPT >> NF_VERDICT_BITS);
                return NF_STOLEN;
            }
        }
        }
        ret = ip_local_deliver_finish(skb);
        return NF_STOLEN;
    }
Esempio n. 3
0
static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
{
	struct sk_buff *frag;
	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
	struct ipv6hdr *tmp_hdr;
	struct frag_hdr *fh;
	unsigned int mtu, hlen, left, len;
	__be32 frag_id = 0;
	int ptr, offset = 0, err=0;
	u8 *prevhdr, nexthdr = 0;
	struct net *net = dev_net(skb_dst(skb)->dev);

	hlen = ip6_find_1stfragopt(skb, &prevhdr);
	nexthdr = *prevhdr;

	mtu = ip6_skb_dst_mtu(skb);

	/* We must not fragment if the socket is set to force MTU discovery
	 * or if the skb it not generated by a local socket.
	 */
	if (!skb->local_df) {
		skb->dev = skb_dst(skb)->dev;
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
			      IPSTATS_MIB_FRAGFAILS);
		kfree_skb(skb);
		return -EMSGSIZE;
	}

	if (np && np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
	}
	mtu -= hlen + sizeof(struct frag_hdr);

	if (skb_has_frags(skb)) {
		int first_len = skb_pagelen(skb);
		int truesizes = 0;

		if (first_len - hlen > mtu ||
		    ((first_len - hlen) & 7) ||
		    skb_cloned(skb))
			goto slow_path;

		skb_walk_frags(skb, frag) {
			/* Correct geometry. */
			if (frag->len > mtu ||
			    ((frag->len & 7) && frag->next) ||
			    skb_headroom(frag) < hlen)
			    goto slow_path;

			/* Partially cloned skb? */
			if (skb_shared(frag))
				goto slow_path;

			BUG_ON(frag->sk);
			if (skb->sk) {
				frag->sk = skb->sk;
				frag->destructor = sock_wfree;
				truesizes += frag->truesize;
			}
		}

		err = 0;
		offset = 0;
		frag = skb_shinfo(skb)->frag_list;
		skb_frag_list_init(skb);
		/* BUILD HEADER */

		*prevhdr = NEXTHDR_FRAGMENT;
		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
		if (!tmp_hdr) {
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				      IPSTATS_MIB_FRAGFAILS);
			return -ENOMEM;
		}

		__skb_pull(skb, hlen);
		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
		__skb_push(skb, hlen);
		skb_reset_network_header(skb);
		memcpy(skb_network_header(skb), tmp_hdr, hlen);

		ipv6_select_ident(fh);
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
		fh->frag_off = htons(IP6_MF);
		frag_id = fh->identification;

		first_len = skb_pagelen(skb);
		skb->data_len = first_len - skb_headlen(skb);
		skb->truesize -= truesizes;
		skb->len = first_len;
		ipv6_hdr(skb)->payload_len = htons(first_len -
						   sizeof(struct ipv6hdr));

		dst_hold(&rt->u.dst);

		for (;;) {
			/* Prepare header of the next frame,
			 * before previous one went down. */
			if (frag) {
				frag->ip_summed = CHECKSUM_NONE;
				skb_reset_transport_header(frag);
				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
				__skb_push(frag, hlen);
				skb_reset_network_header(frag);
				memcpy(skb_network_header(frag), tmp_hdr,
				       hlen);
				offset += skb->len - hlen - sizeof(struct frag_hdr);
				fh->nexthdr = nexthdr;
				fh->reserved = 0;
				fh->frag_off = htons(offset);
				if (frag->next != NULL)
					fh->frag_off |= htons(IP6_MF);
				fh->identification = frag_id;
				ipv6_hdr(frag)->payload_len =
						htons(frag->len -
						      sizeof(struct ipv6hdr));
				ip6_copy_metadata(frag, skb);
			}

			err = output(skb);
			if(!err)
				IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
					      IPSTATS_MIB_FRAGCREATES);

			if (err || !frag)
				break;

			skb = frag;
			frag = skb->next;
			skb->next = NULL;
		}

		kfree(tmp_hdr);

		if (err == 0) {
			IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
				      IPSTATS_MIB_FRAGOKS);
			dst_release(&rt->u.dst);
			return 0;
		}

		while (frag) {
			skb = frag->next;
			kfree_skb(frag);
			frag = skb;
		}

		IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
			      IPSTATS_MIB_FRAGFAILS);
		dst_release(&rt->u.dst);
		return err;
	}

slow_path:
	left = skb->len - hlen;		/* Space per frame */
	ptr = hlen;			/* Where to start from */

	/*
	 *	Fragment the datagram.
	 */

	*prevhdr = NEXTHDR_FRAGMENT;

	/*
	 *	Keep copying data until we run out.
	 */
	while(left > 0)	{
		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
		/* IF: we are not sending upto and including the packet end
		   then align the next start on an eight byte boundary */
		if (len < left)	{
			len &= ~7;
		}
		/*
		 *	Allocate buffer.
		 */

		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
				      IPSTATS_MIB_FRAGFAILS);
			err = -ENOMEM;
			goto fail;
		}

		/*
		 *	Set up data on packet
		 */

		ip6_copy_metadata(frag, skb);
		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
		skb_reset_network_header(frag);
		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
		frag->transport_header = (frag->network_header + hlen +
					  sizeof(struct frag_hdr));

		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */
		if (skb->sk)
			skb_set_owner_w(frag, skb->sk);

		/*
		 *	Copy the packet header into the new buffer.
		 */
		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);

		/*
		 *	Build fragment header.
		 */
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
		if (!frag_id) {
			ipv6_select_ident(fh);
			frag_id = fh->identification;
		} else
			fh->identification = frag_id;

		/*
		 *	Copy a block of the IP datagram.
		 */
		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
			BUG();
		left -= len;

		fh->frag_off = htons(offset);
		if (left > 0)
			fh->frag_off |= htons(IP6_MF);
		ipv6_hdr(frag)->payload_len = htons(frag->len -
						    sizeof(struct ipv6hdr));

		ptr += len;
		offset += len;

		/*
		 *	Put this fragment into the sending queue.
		 */
		err = output(frag);
		if (err)
			goto fail;

		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
			      IPSTATS_MIB_FRAGCREATES);
	}
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
		      IPSTATS_MIB_FRAGOKS);
	kfree_skb(skb);
	return err;

fail:
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
		      IPSTATS_MIB_FRAGFAILS);
	kfree_skb(skb);
	return err;
}
Esempio n. 4
0
/* Send RST reply */
static void send_reset(struct sk_buff *oldskb, int hook)
{
	struct sk_buff *nskb;
	struct tcphdr otcph, *tcph;
	struct rtable *rt;
	u_int16_t tmp_port;
	u_int32_t tmp_addr;
	int needs_ack;
	int hh_len;

	/* IP header checks: fragment. */
	if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
		return;

	if (skb_copy_bits(oldskb, oldskb->nh.iph->ihl*4,
			  &otcph, sizeof(otcph)) < 0)
 		return;

	/* No RST for RST. */
	if (otcph.rst)
		return;

	/* FIXME: Check checksum --RR */
	if ((rt = route_reverse(oldskb, hook)) == NULL)
		return;

	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);

	/* We need a linear, writeable skb.  We also need to expand
	   headroom in case hh_len of incoming interface < hh_len of
	   outgoing interface */
	nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
			       GFP_ATOMIC);
	if (!nskb) {
		dst_release(&rt->u.dst);
		return;
	}

	dst_release(nskb->dst);
	nskb->dst = &rt->u.dst;

	/* This packet will not be the same as the other: clear nf fields */
	nf_conntrack_put(nskb->nfct);
	nskb->nfct = NULL;
	nskb->nfcache = 0;
#ifdef CONFIG_NETFILTER_DEBUG
	nskb->nf_debug = 0;
#endif
	nskb->nfmark = 0;
#ifdef CONFIG_BRIDGE_NETFILTER
	nf_bridge_put(nskb->nf_bridge);
	nskb->nf_bridge = NULL;
#endif

	tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);

	/* Swap source and dest */
	tmp_addr = nskb->nh.iph->saddr;
	nskb->nh.iph->saddr = nskb->nh.iph->daddr;
	nskb->nh.iph->daddr = tmp_addr;
	tmp_port = tcph->source;
	tcph->source = tcph->dest;
	tcph->dest = tmp_port;

	/* Truncate to length (no data) */
	tcph->doff = sizeof(struct tcphdr)/4;
	skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
	nskb->nh.iph->tot_len = htons(nskb->len);

	if (tcph->ack) {
		needs_ack = 0;
		tcph->seq = otcph.ack_seq;
		tcph->ack_seq = 0;
	} else {
		needs_ack = 1;
		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
				      + oldskb->len - oldskb->nh.iph->ihl*4
				      - (otcph.doff<<2));
		tcph->seq = 0;
	}

	/* Reset flags */
	((u_int8_t *)tcph)[13] = 0;
	tcph->rst = 1;
	tcph->ack = needs_ack;

	tcph->window = 0;
	tcph->urg_ptr = 0;

	/* Adjust TCP checksum */
	tcph->check = 0;
	tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
				   nskb->nh.iph->saddr,
				   nskb->nh.iph->daddr,
				   csum_partial((char *)tcph,
						sizeof(struct tcphdr), 0));

	/* Adjust IP TTL, DF */
	nskb->nh.iph->ttl = MAXTTL;
	/* Set DF, id = 0 */
	nskb->nh.iph->frag_off = htons(IP_DF);
	nskb->nh.iph->id = 0;

	/* Adjust IP checksum */
	nskb->nh.iph->check = 0;
	nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, 
					   nskb->nh.iph->ihl);

	/* "Never happens" */
	if (nskb->len > dst_pmtu(nskb->dst))
		goto free_nskb;

	connection_attach(nskb, oldskb->nfct);

	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
		ip_finish_output);
	return;

 free_nskb:
	kfree_skb(nskb);
}
Esempio n. 5
0
/*
 *	device anycast group inc (add if not found)
 */
int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
{
	struct ifacaddr6 *aca;
	struct inet6_dev *idev;
	struct rt6_info *rt;
	int err;

	idev = in6_dev_get(dev);

	if (idev == NULL)
		return -EINVAL;

	write_lock_bh(&idev->lock);
	if (idev->dead) {
		err = -ENODEV;
		goto out;
	}

	for (aca = idev->ac_list; aca; aca = aca->aca_next) {
		if (ipv6_addr_equal(&aca->aca_addr, addr)) {
			aca->aca_users++;
			err = 0;
			goto out;
		}
	}

	/*
	 *	not found: create a new one.
	 */

	aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);

	if (aca == NULL) {
		err = -ENOMEM;
		goto out;
	}

	rt = addrconf_dst_alloc(idev, addr, 1);
	if (IS_ERR(rt)) {
		kfree(aca);
		err = PTR_ERR(rt);
		goto out;
	}

	ipv6_addr_copy(&aca->aca_addr, addr);
	aca->aca_idev = idev;
	aca->aca_rt = rt;
	aca->aca_users = 1;
	/* aca_tstamp should be updated upon changes */
	aca->aca_cstamp = aca->aca_tstamp = jiffies;
	atomic_set(&aca->aca_refcnt, 2);
	spin_lock_init(&aca->aca_lock);

	aca->aca_next = idev->ac_list;
	idev->ac_list = aca;
	write_unlock_bh(&idev->lock);

	dst_hold(&rt->u.dst);
	if (ip6_ins_rt(rt))
		dst_release(&rt->u.dst);

	addrconf_join_solict(dev, &aca->aca_addr);

	aca_put(aca);
	return 0;
out:
	write_unlock_bh(&idev->lock);
	in6_dev_put(idev);
	return err;
}
Esempio n. 6
0
static int ipip6_err(struct sk_buff *skb, u32 info)
{
#ifndef I_WISH_WORLD_WERE_PERFECT

    /* It is not :-( All the routers (except for Linux) return only
       8 bytes of packet payload. It means, that precise relaying of
       ICMP in the real Internet is absolutely infeasible.
     */
    struct iphdr *iph = (struct iphdr*)skb->data;
    int type = skb->h.icmph->type;
    int code = skb->h.icmph->code;
    struct ip_tunnel *t;
    int err;

    switch (type) {
    default:
    case ICMP_PARAMETERPROB:
        return 0;

    case ICMP_DEST_UNREACH:
        switch (code) {
        case ICMP_SR_FAILED:
        case ICMP_PORT_UNREACH:
            /* Impossible event. */
            return 0;
        case ICMP_FRAG_NEEDED:
            /* Soft state for pmtu is maintained by IP core. */
            return 0;
        default:
            /* All others are translated to HOST_UNREACH.
               rfc2003 contains "deep thoughts" about NET_UNREACH,
               I believe they are just ether pollution. --ANK
             */
            break;
        }
        break;
    case ICMP_TIME_EXCEEDED:
        if (code != ICMP_EXC_TTL)
            return 0;
        break;
    }

    err = -ENOENT;

    read_lock(&ipip6_lock);
    t = ipip6_tunnel_lookup(iph->daddr, iph->saddr);
    if (t == NULL || t->parms.iph.daddr == 0)
        goto out;

    err = 0;
    if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
        goto out;

    if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
        t->err_count++;
    else
        t->err_count = 1;
    t->err_time = jiffies;
out:
    read_unlock(&ipip6_lock);
    return err;
#else
    struct iphdr *iph = (struct iphdr*)dp;
    int hlen = iph->ihl<<2;
    struct ipv6hdr *iph6;
    int type = skb->h.icmph->type;
    int code = skb->h.icmph->code;
    int rel_type = 0;
    int rel_code = 0;
    int rel_info = 0;
    struct sk_buff *skb2;
    struct rt6_info *rt6i;

    if (len < hlen + sizeof(struct ipv6hdr))
        return;
    iph6 = (struct ipv6hdr*)(dp + hlen);

    switch (type) {
    default:
        return;
    case ICMP_PARAMETERPROB:
        if (skb->h.icmph->un.gateway < hlen)
            return;

        /* So... This guy found something strange INSIDE encapsulated
           packet. Well, he is fool, but what can we do ?
         */
        rel_type = ICMPV6_PARAMPROB;
        rel_info = skb->h.icmph->un.gateway - hlen;
        break;

    case ICMP_DEST_UNREACH:
        switch (code) {
        case ICMP_SR_FAILED:
        case ICMP_PORT_UNREACH:
            /* Impossible event. */
            return;
        case ICMP_FRAG_NEEDED:
            /* Too complicated case ... */
            return;
        default:
            /* All others are translated to HOST_UNREACH.
               rfc2003 contains "deep thoughts" about NET_UNREACH,
               I believe, it is just ether pollution. --ANK
             */
            rel_type = ICMPV6_DEST_UNREACH;
            rel_code = ICMPV6_ADDR_UNREACH;
            break;
        }
        break;
    case ICMP_TIME_EXCEEDED:
        if (code != ICMP_EXC_TTL)
            return;
        rel_type = ICMPV6_TIME_EXCEED;
        rel_code = ICMPV6_EXC_HOPLIMIT;
        break;
    }

    /* Prepare fake skb to feed it to icmpv6_send */
    skb2 = skb_clone(skb, GFP_ATOMIC);
    if (skb2 == NULL)
        return 0;
    dst_release(skb2->dst);
    skb2->dst = NULL;
    skb_pull(skb2, skb->data - (u8*)iph6);
    skb2->nh.raw = skb2->data;

    /* Try to guess incoming interface */
    rt6i = rt6_lookup(&iph6->saddr, NULL, NULL, 0);
    if (rt6i && rt6i->rt6i_dev) {
        skb2->dev = rt6i->rt6i_dev;

        rt6i = rt6_lookup(&iph6->daddr, &iph6->saddr, NULL, 0);

        if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) {
            struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev);
            if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) {
                rel_type = ICMPV6_DEST_UNREACH;
                rel_code = ICMPV6_ADDR_UNREACH;
            }
            icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
        }
    }
    kfree_skb(skb2);
    return 0;
#endif
}
Esempio n. 7
0
int ipv6_getsockopt(struct sock *sk, int level, int optname,
		    char __user *optval, int __user *optlen)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	int len;
	int val;

	if (level == SOL_IP && sk->sk_type != SOCK_RAW)
		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
	if(level!=SOL_IPV6)
		return -ENOPROTOOPT;
	if (get_user(len, optlen))
		return -EFAULT;
	switch (optname) {
	case IPV6_ADDRFORM:
		if (sk->sk_protocol != IPPROTO_UDP &&
		    sk->sk_protocol != IPPROTO_TCP)
			return -EINVAL;
		if (sk->sk_state != TCP_ESTABLISHED)
			return -ENOTCONN;
		val = sk->sk_family;
		break;
	case MCAST_MSFILTER:
	{
		struct group_filter gsf;
		int err;

		if (len < GROUP_FILTER_SIZE(0))
			return -EINVAL;
		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
			return -EFAULT;
		lock_sock(sk);
		err = ip6_mc_msfget(sk, &gsf,
			(struct group_filter __user *)optval, optlen);
		release_sock(sk);
		return err;
	}

	case IPV6_PKTOPTIONS:
	{
		struct msghdr msg;
		struct sk_buff *skb;

		if (sk->sk_type != SOCK_STREAM)
			return -ENOPROTOOPT;

		msg.msg_control = optval;
		msg.msg_controllen = len;
		msg.msg_flags = 0;

		lock_sock(sk);
		skb = np->pktoptions;
		if (skb)
			atomic_inc(&skb->users);
		release_sock(sk);

		if (skb) {
			int err = datagram_recv_ctl(sk, &msg, skb);
			kfree_skb(skb);
			if (err)
				return err;
		} else {
			if (np->rxopt.bits.rxinfo) {
				struct in6_pktinfo src_info;
				src_info.ipi6_ifindex = np->mcast_oif;
				ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
			}
			if (np->rxopt.bits.rxhlim) {
				int hlim = np->mcast_hops;
				put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
			}
		}
		len -= msg.msg_controllen;
		return put_user(len, optlen);
	}
	case IPV6_MTU:
	{
		struct dst_entry *dst;
		val = 0;	
		lock_sock(sk);
		dst = sk_dst_get(sk);
		if (dst) {
			val = dst_mtu(dst);
			dst_release(dst);
		}
		release_sock(sk);
		if (!val)
			return -ENOTCONN;
		break;
	}

	case IPV6_V6ONLY:
		val = np->ipv6only;
		break;

	case IPV6_PKTINFO:
		val = np->rxopt.bits.rxinfo;
		break;

	case IPV6_HOPLIMIT:
		val = np->rxopt.bits.rxhlim;
		break;

	case IPV6_RTHDR:
		val = np->rxopt.bits.srcrt;
		break;

	case IPV6_HOPOPTS:
		val = np->rxopt.bits.hopopts;
		break;

	case IPV6_DSTOPTS:
		val = np->rxopt.bits.dstopts;
		break;

	case IPV6_FLOWINFO:
		val = np->rxopt.bits.rxflow;
		break;

	case IPV6_UNICAST_HOPS:
		val = np->hop_limit;
		break;

	case IPV6_MULTICAST_HOPS:
		val = np->mcast_hops;
		break;

	case IPV6_MULTICAST_LOOP:
		val = np->mc_loop;
		break;

	case IPV6_MULTICAST_IF:
		val = np->mcast_oif;
		break;

	case IPV6_MTU_DISCOVER:
		val = np->pmtudisc;
		break;

	case IPV6_RECVERR:
		val = np->recverr;
		break;

	case IPV6_FLOWINFO_SEND:
		val = np->sndflow;
		break;

	default:
#ifdef CONFIG_NETFILTER
		lock_sock(sk);
		val = nf_getsockopt(sk, PF_INET6, optname, optval, 
				    &len);
		release_sock(sk);
		if (val >= 0)
			val = put_user(len, optlen);
		return val;
#else
		return -EINVAL;
#endif
	}
	len = min_t(unsigned int, sizeof(int), len);
	if(put_user(len, optlen))
		return -EFAULT;
	if(copy_to_user(optval,&val,len))
		return -EFAULT;
	return 0;
}
Esempio n. 8
0
int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi)
{
	int err;
	__be32 seq;
	struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH];
	struct xfrm_state *x;
	int xfrm_nr = 0;
	int decaps = 0;
	int nexthdr;
	unsigned int nhoff;

	nhoff = IP6CB(skb)->nhoff;
	nexthdr = skb_network_header(skb)[nhoff];

	seq = 0;
	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
		goto drop;

	do {
		struct ipv6hdr *iph = ipv6_hdr(skb);

		if (xfrm_nr == XFRM_MAX_DEPTH)
			goto drop;

		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi,
				nexthdr != IPPROTO_IPIP ? nexthdr : IPPROTO_IPV6, AF_INET6);
		if (x == NULL)
			goto drop;
		spin_lock(&x->lock);
		if (unlikely(x->km.state != XFRM_STATE_VALID))
			goto drop_unlock;

		if (x->props.replay_window && xfrm_replay_check(x, seq))
			goto drop_unlock;

		if (xfrm_state_check_expire(x))
			goto drop_unlock;

		nexthdr = x->type->input(x, skb);
		if (nexthdr <= 0)
			goto drop_unlock;

		skb_network_header(skb)[nhoff] = nexthdr;

		if (x->props.replay_window)
			xfrm_replay_advance(x, seq);

		x->curlft.bytes += skb->len;
		x->curlft.packets++;

		spin_unlock(&x->lock);

		xfrm_vec[xfrm_nr++] = x;

		if (x->mode->input(x, skb))
			goto drop;

		if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */
			decaps = 1;
			break;
		}

		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
			goto drop;
	} while (!err);

	/* Allocate new secpath or COW existing one. */
	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
		struct sec_path *sp;
		sp = secpath_dup(skb->sp);
		if (!sp)
			goto drop;
		if (skb->sp)
			secpath_put(skb->sp);
		skb->sp = sp;
	}

	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
		goto drop;

	memcpy(skb->sp->xvec + skb->sp->len, xfrm_vec,
	       xfrm_nr * sizeof(xfrm_vec[0]));
	skb->sp->len += xfrm_nr;
	skb->ip_summed = CHECKSUM_NONE;

	nf_reset(skb);

	if (decaps) {
		dst_release(skb->dst);
		skb->dst = NULL;
		netif_rx(skb);
		return -1;
	} else {
#ifdef CONFIG_NETFILTER
		ipv6_hdr(skb)->payload_len = htons(skb->len);
		__skb_push(skb, skb->data - skb_network_header(skb));

		NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
			ip6_rcv_finish);
		return -1;
#else
		return 1;
#endif
	}

drop_unlock:
	spin_unlock(&x->lock);
	xfrm_state_put(x);
drop:
	while (--xfrm_nr >= 0)
		xfrm_state_put(xfrm_vec[xfrm_nr]);
	kfree_skb(skb);
	return -1;
}
Esempio n. 9
0
void ipip_err(struct sk_buff *skb, u32 info)
{
#ifndef I_WISH_WORLD_WERE_PERFECT

/* It is not :-( All the routers (except for Linux) return only
   8 bytes of packet payload. It means, that precise relaying of
   ICMP in the real Internet is absolutely infeasible.
 */
	struct iphdr *iph = (struct iphdr*)skb->data;
	int type = skb->h.icmph->type;
	int code = skb->h.icmph->code;
	struct ip_tunnel *t;

	switch (type) {
	default:
	case ICMP_PARAMETERPROB:
		return;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* Soft state for pmtu is maintained by IP core. */
			return;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe they are just ether pollution. --ANK
			 */
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

	read_lock(&ipip_lock);
	t = ipip_tunnel_lookup(iph->daddr, iph->saddr);
	if (t == NULL || t->parms.iph.daddr == 0)
		goto out;
	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
		goto out;

	if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
		t->err_count++;
	else
		t->err_count = 1;
	t->err_time = jiffies;
out:
	read_unlock(&ipip_lock);
	return;
#else
	struct iphdr *iph = (struct iphdr*)dp;
	int hlen = iph->ihl<<2;
	struct iphdr *eiph;
	int type = skb->h.icmph->type;
	int code = skb->h.icmph->code;
	int rel_type = 0;
	int rel_code = 0;
	int rel_info = 0;
	struct sk_buff *skb2;
	struct rtable *rt;

	if (len < hlen + sizeof(struct iphdr))
		return;
	eiph = (struct iphdr*)(dp + hlen);

	switch (type) {
	default:
		return;
	case ICMP_PARAMETERPROB:
		if (skb->h.icmph->un.gateway < hlen)
			return;

		/* So... This guy found something strange INSIDE encapsulated
		   packet. Well, he is fool, but what can we do ?
		 */
		rel_type = ICMP_PARAMETERPROB;
		rel_info = skb->h.icmph->un.gateway - hlen;
		break;

	case ICMP_DEST_UNREACH:
		switch (code) {
		case ICMP_SR_FAILED:
		case ICMP_PORT_UNREACH:
			/* Impossible event. */
			return;
		case ICMP_FRAG_NEEDED:
			/* And it is the only really necesary thing :-) */
			rel_info = ntohs(skb->h.icmph->un.frag.mtu);
			if (rel_info < hlen+68)
				return;
			rel_info -= hlen;
			/* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */
			if (rel_info > ntohs(eiph->tot_len))
				return;
			break;
		default:
			/* All others are translated to HOST_UNREACH.
			   rfc2003 contains "deep thoughts" about NET_UNREACH,
			   I believe, it is just ether pollution. --ANK
			 */
			rel_type = ICMP_DEST_UNREACH;
			rel_code = ICMP_HOST_UNREACH;
			break;
		}
		break;
	case ICMP_TIME_EXCEEDED:
		if (code != ICMP_EXC_TTL)
			return;
		break;
	}

	/* Prepare fake skb to feed it to icmp_send */
	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (skb2 == NULL)
		return;
	dst_release(skb2->dst);
	skb2->dst = NULL;
	skb_pull(skb2, skb->data - (u8*)eiph);
	skb2->nh.raw = skb2->data;

	/* Try to guess incoming interface */
	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
		kfree_skb(skb2);
		return;
	}
	skb2->dev = rt->u.dst.dev;

	/* route "incoming" packet */
	if (rt->rt_flags&RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
			ip_rt_put(rt);
			kfree_skb(skb2);
			return;
		}
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
		    skb2->dst->dev->type != ARPHRD_IPGRE) {
			kfree_skb(skb2);
			return;
		}
	}

	/* change mtu on this route */
	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
		if (rel_info > skb2->dst->pmtu) {
			kfree_skb(skb2);
			return;
		}
		skb2->dst->pmtu = rel_info;
		rel_info = htonl(rel_info);
	} else if (type == ICMP_TIME_EXCEEDED) {
		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
		if (t->parms.iph.ttl) {
			rel_type = ICMP_DEST_UNREACH;
			rel_code = ICMP_HOST_UNREACH;
		}
	}

	icmp_send(skb2, rel_type, rel_code, rel_info);
	kfree_skb(skb2);
	return;
#endif
}
Esempio n. 10
0
static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
	dst_release(t->dst_cache);
	t->dst_cache = NULL;
}
Esempio n. 11
0
static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
			 struct flowi *fl,
			 int encap_limit,
			 __u32 *pmtu)
{
	struct net *net = dev_net(dev);
	struct ip6_tnl *t = netdev_priv(dev);
	struct net_device_stats *stats = &t->dev->stats;
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	struct ipv6_tel_txoption opt;
	struct dst_entry *dst;
	struct net_device *tdev;
	int mtu;
	unsigned int max_headroom = sizeof(struct ipv6hdr);
	u8 proto;
	int err = -1;
	int pkt_len;

	if ((dst = ip6_tnl_dst_check(t)) != NULL)
		dst_hold(dst);
	else {
		dst = ip6_route_output(net, NULL, fl);

		if (dst->error || xfrm_lookup(net, &dst, fl, NULL, 0) < 0)
			goto tx_err_link_failure;
	}

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
		if (net_ratelimit())
			printk(KERN_WARNING
			       "%s: Local routing loop detected!\n",
			       t->parms.name);
		goto tx_err_dst_release;
	}
	mtu = dst_mtu(dst) - sizeof (*ipv6h);
	if (encap_limit >= 0) {
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
	if (skb->len > mtu) {
		*pmtu = mtu;
		err = -EMSGSIZE;
		goto tx_err_dst_release;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom += LL_RESERVED_SPACE(tdev);

	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
		struct sk_buff *new_skb;

		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
			goto tx_err_dst_release;

		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		kfree_skb(skb);
		skb = new_skb;
	}
	skb_dst_drop(skb);
	skb_dst_set(skb, dst_clone(dst));

	skb->transport_header = skb->network_header;

	proto = fl->proto;
	if (encap_limit >= 0) {
		init_tel_txopt(&opt, encap_limit);
		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
	}
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	ipv6h = ipv6_hdr(skb);
	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
	dsfield = INET_ECN_encapsulate(0, dsfield);
	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
	ipv6h->hop_limit = t->parms.hop_limit;
	ipv6h->nexthdr = proto;
	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
	nf_reset(skb);
	pkt_len = skb->len;
	err = ip6_local_out(skb);

	if (net_xmit_eval(err) == 0) {
		stats->tx_bytes += pkt_len;
		stats->tx_packets++;
	} else {
		stats->tx_errors++;
		stats->tx_aborted_errors++;
	}
	ip6_tnl_dst_store(t, dst);
	return 0;
tx_err_link_failure:
	stats->tx_carrier_errors++;
	dst_link_failure(skb);
tx_err_dst_release:
	dst_release(dst);
	return err;
}
Esempio n. 12
0
int ip6_build_xmit(struct sock *sk, inet_getfrag_t getfrag, const void *data,
		   struct flowi *fl, unsigned length,
		   struct ipv6_txoptions *opt, int hlimit, int flags)
{
	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
	struct in6_addr *final_dst = NULL;
	struct dst_entry *dst;
	int err = 0;
	unsigned int pktlength, jumbolen, mtu;

	if (opt && opt->srcrt) {
		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
		final_dst = fl->nl_u.ip6_u.daddr;
		fl->nl_u.ip6_u.daddr = rt0->addr;
	}

	if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
		fl->oif = np->mcast_oif;

	dst = NULL;
	if (sk->dst_cache) {
		dst = dst_check(&sk->dst_cache, np->dst_cookie);
		if (dst) {
			struct rt6_info *rt = (struct rt6_info*)dst_clone(dst);

			/* Yes, checking route validity in not connected
			   case is not very simple. Take into account,
			   that we do not support routing by source, TOS,
			   and MSG_DONTROUTE 		--ANK (980726)

			   1. If route was host route, check that
			      cached destination is current.
			      If it is network route, we still may
			      check its validity using saved pointer
			      to the last used address: daddr_cache.
			      We do not want to save whole address now,
			      (because main consumer of this service
			       is tcp, which has not this problem),
			      so that the last trick works only on connected
			      sockets.
			   2. oif also should be the same.
			 */
			if (((rt->rt6i_dst.plen != 128 ||
			      ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
			     && (np->daddr_cache == NULL ||
				 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
			    || (fl->oif && fl->oif != dst->dev->ifindex)) {
				dst_release(dst);
				dst = NULL;
			}
		}
	}

	if (dst == NULL)
		dst = ip6_route_output(sk, fl);

	if (dst->error) {
		ipv6_statistics.Ip6OutNoRoutes++;
		dst_release(dst);
		return -ENETUNREACH;
	}

	if (fl->nl_u.ip6_u.saddr == NULL) {
		struct inet6_ifaddr *ifa;
		
		ifa = ipv6_get_saddr(dst, fl->nl_u.ip6_u.daddr);

		if (ifa == NULL) {
#if IP6_DEBUG >= 2
			printk(KERN_DEBUG "ip6_build_xmit: "
			       "no availiable source address\n");
#endif
			err = -ENETUNREACH;
			goto out;
		}
		fl->nl_u.ip6_u.saddr = &ifa->addr;
	}
	pktlength = length;

	if (hlimit < 0) {
		if (ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
			hlimit = np->mcast_hops;
		else
			hlimit = np->hop_limit;
		if (hlimit < 0)
			hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
	}

	jumbolen = 0;

	if (!sk->ip_hdrincl) {
		pktlength += sizeof(struct ipv6hdr);
		if (opt)
			pktlength += opt->opt_flen + opt->opt_nflen;

		if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
			/* Jumbo datagram.
			   It is assumed, that in the case of sk->ip_hdrincl
			   jumbo option is supplied by user.
			 */
			pktlength += 8;
			jumbolen = pktlength - sizeof(struct ipv6hdr);
		}
	}

	mtu = dst->pmtu;
	if (np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
		else if (np->pmtudisc == IPV6_PMTUDISC_DONT)
			mtu = IPV6_MIN_MTU;
	}

	/* Critical arithmetic overflow check.
	   FIXME: may gcc optimize it out? --ANK (980726)
	 */
	if (pktlength < length) {
		ipv6_local_error(sk, EMSGSIZE, fl, mtu);
		err = -EMSGSIZE;
		goto out;
	}

	if (pktlength <= mtu) {
		struct sk_buff *skb;
		struct ipv6hdr *hdr;
		struct device *dev = dst->dev;

		skb = sock_alloc_send_skb(sk, pktlength + 15 +
					  dev->hard_header_len, 0,
					  flags & MSG_DONTWAIT, &err);

		if (skb == NULL) {
			ipv6_statistics.Ip6OutDiscards++;
			goto out;
		}

		skb->dst = dst_clone(dst);

		skb_reserve(skb, (dev->hard_header_len + 15) & ~15);

		hdr = (struct ipv6hdr *) skb->tail;
		skb->nh.ipv6h = hdr;

		if (!sk->ip_hdrincl) {
			ip6_bld_1(sk, skb, fl, hlimit,
				  jumbolen ? sizeof(struct ipv6hdr) : pktlength);

			if (opt || jumbolen) {
				u8 *prev_hdr = &hdr->nexthdr;
				prev_hdr = ipv6_build_nfrag_opts(skb, prev_hdr, opt, final_dst, jumbolen);
				if (opt && opt->opt_flen)
					ipv6_build_frag_opts(skb, prev_hdr, opt);
			}
		}

		skb_put(skb, length);
		err = getfrag(data, &hdr->saddr,
			      ((char *) hdr) + (pktlength - length),
			      0, length);

		if (!err) {
			ipv6_statistics.Ip6OutRequests++;
			dst->output(skb);
		} else {
			err = -EFAULT;
			kfree_skb(skb);
		}
	} else {
		if (sk->ip_hdrincl || jumbolen ||
		    np->pmtudisc == IPV6_PMTUDISC_DO) {
			ipv6_local_error(sk, EMSGSIZE, fl, mtu);
			err = -EMSGSIZE;
			goto out;
		}

		err = ip6_frag_xmit(sk, getfrag, data, dst, fl, opt, final_dst, hlimit,
				    flags, length, mtu);
	}

	/*
	 *	cleanup
	 */
out:
	ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
	return err;
}
Esempio n. 13
0
enum ipsec_xmit_value
ipsec_mast_send(struct ipsec_xmit_state*ixs)
{
	/* new route/dst cache code from James Morris */
	ixs->skb->dev = ixs->physdev;
	/*skb_orphan(ixs->skb);*/
	if((ixs->error = ip_route_output(&ixs->route,
				    ixs->skb->nh.iph->daddr,
				    ixs->pass ? 0 : ixs->skb->nh.iph->saddr,
				    RT_TOS(ixs->skb->nh.iph->tos),
				    ixs->physdev->iflink /* rgb: should this be 0? */))) {
		ixs->stats->tx_errors++;
		KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
			    "klips_debug:ipsec_xmit_send: "
			    "ip_route_output failed with error code %d, rt->u.dst.dev=%s, dropped\n",
			    ixs->error,
			    ixs->route->u.dst.dev->name);
		return IPSEC_XMIT_ROUTEERR;
	}
	if(ixs->dev == ixs->route->u.dst.dev) {
		ip_rt_put(ixs->route);
		/* This is recursion, drop it. */
		ixs->stats->tx_errors++;
		KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
			    "klips_debug:ipsec_xmit_send: "
			    "suspect recursion, dev=rt->u.dst.dev=%s, dropped\n",
			    ixs->dev->name);
		return IPSEC_XMIT_RECURSDETECT;
	}
	dst_release(ixs->skb->dst);
	ixs->skb->dst = &ixs->route->u.dst;
	ixs->stats->tx_bytes += ixs->skb->len;
	if(ixs->skb->len < ixs->skb->nh.raw - ixs->skb->data) {
		ixs->stats->tx_errors++;
		printk(KERN_WARNING
		       "klips_error:ipsec_xmit_send: "
		       "tried to __skb_pull nh-data=%ld, %d available.  This should never happen, please report.\n",
		       (unsigned long)(ixs->skb->nh.raw - ixs->skb->data),
		       ixs->skb->len);
		return IPSEC_XMIT_PUSHPULLERR;
	}
	__skb_pull(ixs->skb, ixs->skb->nh.raw - ixs->skb->data);
#ifdef SKB_RESET_NFCT
	nf_conntrack_put(ixs->skb->nfct);
	ixs->skb->nfct = NULL;
#ifdef CONFIG_NETFILTER_DEBUG
	ixs->skb->nf_debug = 0;
#endif /* CONFIG_NETFILTER_DEBUG */
#endif /* SKB_RESET_NFCT */
	KLIPS_PRINT(debug_mast & DB_MAST_XMIT,
		    "klips_debug:ipsec_xmit_send: "
		    "...done, calling ip_send() on device:%s\n",
		    ixs->skb->dev ? ixs->skb->dev->name : "NULL");
	KLIPS_IP_PRINT(debug_mast & DB_MAST_XMIT, ixs->skb->nh.iph);
	{
		int err;

		err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, ixs->skb, NULL, ixs->route->u.dst.dev,
			      ipsec_mast_xmit2);
		if(err != NET_XMIT_SUCCESS && err != NET_XMIT_CN) {
			if(net_ratelimit())
				printk(KERN_ERR
				       "klips_error:ipsec_xmit_send: "
				       "ip_send() failed, err=%d\n", 
				       -err);
			ixs->stats->tx_errors++;
			ixs->stats->tx_aborted_errors++;
			ixs->skb = NULL;
			return IPSEC_XMIT_IPSENDFAILURE;
		}
	}
	ixs->stats->tx_packets++;

	ixs->skb = NULL;
	
	return IPSEC_XMIT_OK;
}
Esempio n. 14
0
/* Send RST reply */
static void send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
	struct sk_buff *nskb;
	struct tcphdr otcph, *tcph;
	unsigned int otcplen, hh_len;
	int tcphoff, needs_ack;
	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb);
	struct ipv6hdr *ip6h;
#define DEFAULT_TOS_VALUE	0x0U
	const __u8 tclass = DEFAULT_TOS_VALUE;
	struct dst_entry *dst = NULL;
	u8 proto;
	struct flowi6 fl6;

	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
		pr_debug("addr is not unicast.\n");
		return;
	}

	proto = oip6h->nexthdr;
	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);

	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
		pr_debug("Cannot get TCP header.\n");
		return;
	}

	otcplen = oldskb->len - tcphoff;

	/* IP header checks: fragment, too short. */
	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
		pr_debug("proto(%d) != IPPROTO_TCP, "
			 "or too short. otcplen = %d\n",
			 proto, otcplen);
		return;
	}

	if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr)))
		BUG();

	/* No RST for RST. */
	if (otcph.rst) {
		pr_debug("RST is set\n");
		return;
	}

	/* Check checksum. */
	if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) {
		pr_debug("TCP checksum is invalid\n");
		return;
	}

	memset(&fl6, 0, sizeof(fl6));
	fl6.flowi6_proto = IPPROTO_TCP;
	ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
	ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
	fl6.fl6_sport = otcph.dest;
	fl6.fl6_dport = otcph.source;
	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
	dst = ip6_route_output(net, NULL, &fl6);
	if (dst == NULL || dst->error) {
		dst_release(dst);
		return;
	}
	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
	if (IS_ERR(dst))
		return;

	hh_len = (dst->dev->hard_header_len + 15)&~15;
	nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr)
			 + sizeof(struct tcphdr) + dst->trailer_len,
			 GFP_ATOMIC);

	if (!nskb) {
		if (net_ratelimit())
			pr_debug("cannot alloc skb\n");
		dst_release(dst);
		return;
	}

	skb_dst_set(nskb, dst);

	skb_reserve(nskb, hh_len + dst->header_len);

	skb_put(nskb, sizeof(struct ipv6hdr));
	skb_reset_network_header(nskb);
	ip6h = ipv6_hdr(nskb);
	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
	ip6h->hop_limit = ip6_dst_hoplimit(dst);
	ip6h->nexthdr = IPPROTO_TCP;
	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);

	skb_reset_transport_header(nskb);
	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
	/* Truncate to length (no data) */
	tcph->doff = sizeof(struct tcphdr)/4;
	tcph->source = otcph.dest;
	tcph->dest = otcph.source;

	if (otcph.ack) {
		needs_ack = 0;
		tcph->seq = otcph.ack_seq;
		tcph->ack_seq = 0;
	} else {
		needs_ack = 1;
		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin
				      + otcplen - (otcph.doff<<2));
		tcph->seq = 0;
	}

	/* Reset flags */
	((u_int8_t *)tcph)[13] = 0;
	tcph->rst = 1;
	tcph->ack = needs_ack;
	tcph->window = 0;
	tcph->urg_ptr = 0;
	tcph->check = 0;

	/* Adjust TCP checksum */
	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr,
				      &ipv6_hdr(nskb)->daddr,
				      sizeof(struct tcphdr), IPPROTO_TCP,
				      csum_partial(tcph,
						   sizeof(struct tcphdr), 0));

	nf_ct_attach(nskb, oldskb);

	ip6_local_out(nskb);
}
Esempio n. 15
0
int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		     struct ip_vs_protocol *pp)
{
	struct rt6_info *rt;		/* Route to the other host */
	struct in6_addr saddr;		/* Source for tunnel */
	struct net_device *tdev;	/* Device to other host */
	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
	struct ipv6hdr  *iph;		/* Our new IP header */
	unsigned int max_headroom;	/* The extra header space needed */
	int    mtu;
	int ret;

	EnterFunction(10);

	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
					 &saddr, 1, (IP_VS_RT_MODE_LOCAL |
						     IP_VS_RT_MODE_NON_LOCAL))))
		goto tx_error_icmp;
	if (__ip_vs_is_local_route6(rt)) {
		dst_release(&rt->dst);
		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
	}

	tdev = rt->dst.dev;

	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
	if (mtu < IPV6_MIN_MTU) {
		IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
			     IPV6_MIN_MTU);
		goto tx_error_put;
	}
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
	    !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);

	if (skb_headroom(skb) < max_headroom
	    || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb =
			skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			dst_release(&rt->dst);
			kfree_skb(skb);
			IP_VS_ERR_RL("%s(): no memory\n", __func__);
			return NF_STOLEN;
		}
		kfree_skb(skb);
		skb = new_skb;
		old_iph = ipv6_hdr(skb);
	}

	skb->transport_header = skb->network_header;

	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/* drop old route */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ipv6_hdr(skb);
	iph->version		=	6;
	iph->nexthdr		=	IPPROTO_IPV6;
	iph->payload_len	=	old_iph->payload_len;
	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
	iph->priority		=	old_iph->priority;
	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
	ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
	ipv6_addr_copy(&iph->saddr, &saddr);
	iph->hop_limit		=	old_iph->hop_limit;

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ret = IP_VS_XMIT_TUNNEL(skb, cp);
	if (ret == NF_ACCEPT)
		ip6_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);

	LeaveFunction(10);

	return NF_STOLEN;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Esempio n. 16
0
static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
	struct net_device_stats *stats = &tunnel->stat;
	struct iphdr  *tiph = &tunnel->parms.iph;
	u8     tos = tunnel->parms.iph.tos;
	u16    df = tiph->frag_off;
	struct rtable *rt;     			/* Route to the other host */
	struct net_device *tdev;			/* Device to other host */
	struct iphdr  *old_iph = skb->nh.iph;
	struct iphdr  *iph;			/* Our new IP header */
	int    max_headroom;			/* The extra header space needed */
	u32    dst = tiph->daddr;
	int    mtu;

	if (tunnel->recursion++) {
		tunnel->stat.collisions++;
		goto tx_error;
	}

	if (skb->protocol != htons(ETH_P_IP))
		goto tx_error;

	if (tos&1)
		tos = old_iph->tos;

	if (!dst) {
		/* NBMA tunnel */
		if ((rt = (struct rtable*)skb->dst) == NULL) {
			tunnel->stat.tx_fifo_errors++;
			goto tx_error;
		}
		if ((dst = rt->rt_gateway) == 0)
			goto tx_error_icmp;
	}

	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
		tunnel->stat.tx_carrier_errors++;
		goto tx_error_icmp;
	}
	tdev = rt->u.dst.dev;

	if (tdev == dev) {
		ip_rt_put(rt);
		tunnel->stat.collisions++;
		goto tx_error;
	}

	if (tiph->frag_off)
		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
	else
		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;

	if (mtu < 68) {
		tunnel->stat.collisions++;
		ip_rt_put(rt);
		goto tx_error;
	}
	if (skb->dst && mtu < skb->dst->pmtu)
		skb->dst->pmtu = mtu;

	df |= (old_iph->frag_off&htons(IP_DF));

	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
		ip_rt_put(rt);
		goto tx_error;
	}

	if (tunnel->err_count > 0) {
		if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
			tunnel->err_count--;
			dst_link_failure(skb);
		} else
			tunnel->err_count = 0;
	}

	skb->h.raw = skb->nh.raw;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));

	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
  			stats->tx_dropped++;
			dev_kfree_skb(skb);
			tunnel->recursion--;
			return 0;
		}
		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb(skb);
		skb = new_skb;
	}

	skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	dst_release(skb->dst);
	skb->dst = &rt->u.dst;

	/*
	 *	Push down and install the IPIP header.
	 */

	iph 			=	skb->nh.iph;
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos);
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;

	if ((iph->ttl = tiph->ttl) == 0)
		iph->ttl	=	old_iph->ttl;

#ifdef CONFIG_NETFILTER
	nf_conntrack_put(skb->nfct);
	skb->nfct = NULL;
#ifdef CONFIG_NETFILTER_DEBUG
	skb->nf_debug = 0;
#endif
#endif

	IPTUNNEL_XMIT();
	tunnel->recursion--;
	return 0;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	stats->tx_errors++;
	dev_kfree_skb(skb);
	tunnel->recursion--;
	return 0;
}
Esempio n. 17
0
static unsigned int
echo_tg6(struct sk_buff **poldskb, const struct xt_action_param *par)
{
	const struct sk_buff *oldskb = *poldskb;
	const struct udphdr *oldudp;
	const struct ipv6hdr *oldip;
	struct udphdr *newudp, oldudp_buf;
	struct ipv6hdr *newip;
	struct sk_buff *newskb;
	unsigned int data_len;
	void *payload;
	struct flowi6 fl;
	struct dst_entry *dst = NULL;
	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);

	/* This allows us to do the copy operation in fewer lines of code. */
	if (skb_linearize(*poldskb) < 0)
		return NF_DROP;

	oldip  = ipv6_hdr(oldskb);
	oldudp = skb_header_pointer(oldskb, par->thoff,
	         sizeof(*oldudp), &oldudp_buf);
	if (oldudp == NULL)
		return NF_DROP;
	if (ntohs(oldudp->len) <= sizeof(*oldudp))
		return NF_DROP;

	newskb = alloc_skb(LL_MAX_HEADER + sizeof(*newip) +
	         ntohs(oldudp->len), GFP_ATOMIC);
	if (newskb == NULL)
		return NF_DROP;

	skb_reserve(newskb, LL_MAX_HEADER);
	newskb->protocol = oldskb->protocol;

	skb_reset_network_header(newskb);
	newip = (void *)skb_put(newskb, sizeof(*newip));
	newip->version  = oldip->version;
	newip->priority = oldip->priority;
	memcpy(newip->flow_lbl, oldip->flow_lbl, sizeof(newip->flow_lbl));
	newip->nexthdr  = par->target->proto;
	newip->saddr    = oldip->daddr;
	newip->daddr    = oldip->saddr;

	skb_reset_transport_header(newskb);
	newudp = (void *)skb_put(newskb, sizeof(*newudp));
	newudp->source = oldudp->dest;
	newudp->dest   = oldudp->source;
	newudp->len    = oldudp->len;

	data_len = htons(oldudp->len) - sizeof(*oldudp);
	payload  = skb_header_pointer(oldskb, par->thoff +
	           sizeof(*oldudp), data_len, NULL);
	memcpy(skb_put(newskb, data_len), payload, data_len);

#if 0
	/*
	 * Since no fields are modified (we just swapped things around),
	 * this works too in our specific echo case.
	 */
	newudp->check = oldudp->check;
#else
	newudp->check = 0;
	newudp->check = csum_ipv6_magic(&newip->saddr, &newip->daddr,
	                ntohs(newudp->len), IPPROTO_UDP,
	                csum_partial(newudp, ntohs(newudp->len), 0));
#endif

	memset(&fl, 0, sizeof(fl));
	fl.flowi6_proto = newip->nexthdr;
	memcpy(&fl.saddr, &newip->saddr, sizeof(fl.saddr));
	memcpy(&fl.daddr, &newip->daddr, sizeof(fl.daddr));
	fl.fl6_sport = newudp->source;
	fl.fl6_dport = newudp->dest;
	security_skb_classify_flow((struct sk_buff *)oldskb, flowi6_to_flowi(&fl));
	dst = ip6_route_output(net, NULL, &fl);
	if (dst == NULL || dst->error != 0) {
		dst_release(dst);
		goto free_nskb;
	}

	skb_dst_set(newskb, dst);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 38)
	newip->hop_limit = ip6_dst_hoplimit(skb_dst(newskb));
#else
	newip->hop_limit = dst_metric(skb_dst(newskb), RTAX_HOPLIMIT);
#endif
	newskb->ip_summed = CHECKSUM_NONE;

	/* "Never happens" (?) */
	if (newskb->len > dst_mtu(skb_dst(newskb)))
		goto free_nskb;

	nf_ct_attach(newskb, *poldskb);
	ip6_local_out(newskb);
	return NF_DROP;

 free_nskb:
	kfree_skb(newskb);
	return NF_DROP;
}
Esempio n. 18
0
static int ax8netfilter_do_ip_getsockopt(struct sock *sk, int level, int optname,
			    char __user *optval, int __user *optlen)
{
	struct inet_sock *inet = inet_sk(sk);
	int val;
	int len;

	if (level != SOL_IP)
		return -EOPNOTSUPP;

	if (ip_mroute_opt(optname))
		return ip_mroute_getsockopt(sk, optname, optval, optlen);

	if (get_user(len, optlen))
		return -EFAULT;
	if (len < 0)
		return -EINVAL;

	lock_sock(sk);

	switch (optname) {
	case IP_OPTIONS:
	{
		unsigned char optbuf[sizeof(struct ip_options)+40];
		struct ip_options * opt = (struct ip_options *)optbuf;
		opt->optlen = 0;
		if (inet->opt)
			memcpy(optbuf, inet->opt,
			       sizeof(struct ip_options)+
			       inet->opt->optlen);
		release_sock(sk);

		if (opt->optlen == 0)
			return put_user(0, optlen);

		ax8netfilter_ip_options_undo(opt);

		len = min_t(unsigned int, len, opt->optlen);
		if (put_user(len, optlen))
			return -EFAULT;
		if (copy_to_user(optval, opt->__data, len))
			return -EFAULT;
		return 0;
	}
	case IP_PKTINFO:
		val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
		break;
	case IP_RECVTTL:
		val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
		break;
	case IP_RECVTOS:
		val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
		break;
	case IP_RECVOPTS:
		val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
		break;
	case IP_RETOPTS:
		val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
		break;
	case IP_PASSSEC:
		val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
		break;
	case IP_RECVORIGDSTADDR:
		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
		break;
	case IP_TOS:
		val = inet->tos;
		break;
	case IP_TTL:
		val = (inet->uc_ttl == -1 ?
		       *ax8netfilter_sysctl_ip_default_ttl :
		       inet->uc_ttl);
		break;
	case IP_HDRINCL:
		val = inet->hdrincl;
		break;
	case IP_MTU_DISCOVER:
		val = inet->pmtudisc;
		break;
	case IP_MTU:
	{
		struct dst_entry *dst;
		val = 0;
		dst = sk_dst_get(sk);
		if (dst) {
			val = dst_mtu(dst);
			dst_release(dst);
		}
		if (!val) {
			release_sock(sk);
			return -ENOTCONN;
		}
		break;
	}
	case IP_RECVERR:
		val = inet->recverr;
		break;
	case IP_MULTICAST_TTL:
		val = inet->mc_ttl;
		break;
	case IP_MULTICAST_LOOP:
		val = inet->mc_loop;
		break;
	case IP_MULTICAST_IF:
	{
		struct in_addr addr;
		len = min_t(unsigned int, len, sizeof(struct in_addr));
		addr.s_addr = inet->mc_addr;
		release_sock(sk);

		if (put_user(len, optlen))
			return -EFAULT;
		if (copy_to_user(optval, &addr, len))
			return -EFAULT;
		return 0;
	}
	case IP_MSFILTER:
	{
		struct ip_msfilter msf;
		int err;

		if (len < IP_MSFILTER_SIZE(0)) {
			release_sock(sk);
			return -EINVAL;
		}
		if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
			release_sock(sk);
			return -EFAULT;
		}
		err = ax8netfilter_ip_mc_msfget(sk, &msf,
				   (struct ip_msfilter __user *)optval, optlen);
		release_sock(sk);
		return err;
	}
	case MCAST_MSFILTER:
	{
		struct group_filter gsf;
		int err;

		if (len < GROUP_FILTER_SIZE(0)) {
			release_sock(sk);
			return -EINVAL;
		}
		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
			release_sock(sk);
			return -EFAULT;
		}
		err = ax8netfilter_ip_mc_gsfget(sk, &gsf,
				   (struct group_filter __user *)optval, optlen);
		release_sock(sk);
		return err;
	}
	case IP_PKTOPTIONS:
	{
		struct msghdr msg;

		release_sock(sk);

		if (sk->sk_type != SOCK_STREAM)
			return -ENOPROTOOPT;

		msg.msg_control = optval;
		msg.msg_controllen = len;
		msg.msg_flags = 0;

		if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
			struct in_pktinfo info;

			info.ipi_addr.s_addr = inet->rcv_saddr;
			info.ipi_spec_dst.s_addr = inet->rcv_saddr;
			info.ipi_ifindex = inet->mc_index;
			put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
		}
		if (inet->cmsg_flags & IP_CMSG_TTL) {
			int hlim = inet->mc_ttl;
			put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
		}
		len -= msg.msg_controllen;
		return put_user(len, optlen);
	}
	case IP_FREEBIND:
		val = inet->freebind;
		break;
	case IP_TRANSPARENT:
		val = inet->transparent;
		break;
	default:
		release_sock(sk);
		return -ENOPROTOOPT;
	}
	release_sock(sk);

	if (len < sizeof(int) && len > 0 && val>=0 && val<=255) {
		unsigned char ucval = (unsigned char)val;
		len = 1;
		if (put_user(len, optlen))
			return -EFAULT;
		if (copy_to_user(optval, &ucval, 1))
			return -EFAULT;
	} else {
		len = min_t(unsigned int, sizeof(int), len);
		if (put_user(len, optlen))
			return -EFAULT;
		if (copy_to_user(optval, &val, len))
			return -EFAULT;
	}
	return 0;
}
Esempio n. 19
0
static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
{
    struct ip_tunnel *tunnel = netdev_priv(dev);
    struct net_device_stats *stats = &tunnel->stat;
    struct iphdr  *tiph = &tunnel->parms.iph;
    struct ipv6hdr *iph6 = skb->nh.ipv6h;
    u8     tos = tunnel->parms.iph.tos;
    struct rtable *rt;     			/* Route to the other host */
    struct net_device *tdev;			/* Device to other host */
    struct iphdr  *iph;			/* Our new IP header */
    int    max_headroom;			/* The extra header space needed */
    __be32 dst = tiph->daddr;
    int    mtu;
    struct in6_addr *addr6;
    int addr_type;

    if (tunnel->recursion++) {
        tunnel->stat.collisions++;
        goto tx_error;
    }

    if (skb->protocol != htons(ETH_P_IPV6))
        goto tx_error;

    if (!dst)
        dst = try_6to4(&iph6->daddr);

    if (!dst) {
        struct neighbour *neigh = NULL;

        if (skb->dst)
            neigh = skb->dst->neighbour;

        if (neigh == NULL) {
            if (net_ratelimit())
                printk(KERN_DEBUG "sit: nexthop == NULL\n");
            goto tx_error;
        }

        addr6 = (struct in6_addr*)&neigh->primary_key;
        addr_type = ipv6_addr_type(addr6);

        if (addr_type == IPV6_ADDR_ANY) {
            addr6 = &skb->nh.ipv6h->daddr;
            addr_type = ipv6_addr_type(addr6);
        }

        if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
            goto tx_error_icmp;

        dst = addr6->s6_addr32[3];
    }

    {
        struct flowi fl = { .nl_u = { .ip4_u =
                {   .daddr = dst,
                    .saddr = tiph->saddr,
                    .tos = RT_TOS(tos)
                }
            },
            .oif = tunnel->parms.link,
            .proto = IPPROTO_IPV6
        };
        if (ip_route_output_key(&rt, &fl)) {
            tunnel->stat.tx_carrier_errors++;
            goto tx_error_icmp;
        }
    }
    if (rt->rt_type != RTN_UNICAST) {
        ip_rt_put(rt);
        tunnel->stat.tx_carrier_errors++;
        goto tx_error_icmp;
    }
    tdev = rt->u.dst.dev;

    if (tdev == dev) {
        ip_rt_put(rt);
        tunnel->stat.collisions++;
        goto tx_error;
    }

    if (tiph->frag_off)
        mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
    else
        mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;

    if (mtu < 68) {
        tunnel->stat.collisions++;
        ip_rt_put(rt);
        goto tx_error;
    }
    if (mtu < IPV6_MIN_MTU)
        mtu = IPV6_MIN_MTU;
    if (tunnel->parms.iph.daddr && skb->dst)
        skb->dst->ops->update_pmtu(skb->dst, mtu);

    if (skb->len > mtu) {
        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
        ip_rt_put(rt);
        goto tx_error;
    }

    if (tunnel->err_count > 0) {
        if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
            tunnel->err_count--;
            dst_link_failure(skb);
        } else
            tunnel->err_count = 0;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);

    if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            ip_rt_put(rt);
            stats->tx_dropped++;
            dev_kfree_skb(skb);
            tunnel->recursion--;
            return 0;
        }
        if (skb->sk)
            skb_set_owner_w(new_skb, skb->sk);
        dev_kfree_skb(skb);
        skb = new_skb;
        iph6 = skb->nh.ipv6h;
    }

    skb->h.raw = skb->nh.raw;
    skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
    IPCB(skb)->flags = 0;
    dst_release(skb->dst);
    skb->dst = &rt->u.dst;

    /*
     *	Push down and install the IPIP header.
     */

    iph 			=	skb->nh.iph;
    iph->version		=	4;
    iph->ihl		=	sizeof(struct iphdr)>>2;
    if (mtu > IPV6_MIN_MTU)
        iph->frag_off	=	htons(IP_DF);
    else
        iph->frag_off	=	0;

    iph->protocol		=	IPPROTO_IPV6;
    iph->tos		=	INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6));
    iph->daddr		=	rt->rt_dst;
    iph->saddr		=	rt->rt_src;

    if ((iph->ttl = tiph->ttl) == 0)
        iph->ttl	=	iph6->hop_limit;

    nf_reset(skb);

    IPTUNNEL_XMIT();
    tunnel->recursion--;
    return 0;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    stats->tx_errors++;
    dev_kfree_skb(skb);
    tunnel->recursion--;
    return 0;
}
Esempio n. 20
0
static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		int type, int code, int offset, __u32 info)
{
	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
	struct ipv6_pinfo *np;
	struct sock *sk;
	int err;
	struct tcp_sock *tp; 
	__u32 seq;

	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
			  th->source, skb->dev->ifindex);

	if (sk == NULL) {
		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
		return;
	}

	if (sk->sk_state == TCP_TIME_WAIT) {
		inet_twsk_put(inet_twsk(sk));
		return;
	}

	bh_lock_sock(sk);
	if (sock_owned_by_user(sk))
		NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);

	if (sk->sk_state == TCP_CLOSE)
		goto out;

	tp = tcp_sk(sk);
	seq = ntohl(th->seq); 
	if (sk->sk_state != TCP_LISTEN &&
	    !between(seq, tp->snd_una, tp->snd_nxt)) {
		NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
		goto out;
	}

	np = inet6_sk(sk);

	if (type == ICMPV6_PKT_TOOBIG) {
		struct dst_entry *dst = NULL;

		if (sock_owned_by_user(sk))
			goto out;
		if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
			goto out;

		/* icmp should have updated the destination cache entry */
		dst = __sk_dst_check(sk, np->dst_cookie);

		if (dst == NULL) {
			struct inet_sock *inet = inet_sk(sk);
			struct flowi fl;

			/* BUGGG_FUTURE: Again, it is not clear how
			   to handle rthdr case. Ignore this complexity
			   for now.
			 */
			memset(&fl, 0, sizeof(fl));
			fl.proto = IPPROTO_TCP;
			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
			fl.oif = sk->sk_bound_dev_if;
			fl.fl_ip_dport = inet->dport;
			fl.fl_ip_sport = inet->sport;
			security_skb_classify_flow(skb, &fl);

			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
				sk->sk_err_soft = -err;
				goto out;
			}

			if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
				sk->sk_err_soft = -err;
				goto out;
			}

		} else
			dst_hold(dst);

		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
			tcp_sync_mss(sk, dst_mtu(dst));
			tcp_simple_retransmit(sk);
		} /* else let the usual retransmit timer handle it */
		dst_release(dst);
		goto out;
	}

	icmpv6_err_convert(type, code, &err);

	/* Might be for an request_sock */
	switch (sk->sk_state) {
		struct request_sock *req, **prev;
	case TCP_LISTEN:
		if (sock_owned_by_user(sk))
			goto out;

		req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
					   &hdr->saddr, inet6_iif(skb));
		if (!req)
			goto out;

		/* ICMPs are not backlogged, hence we cannot get
		 * an established socket here.
		 */
		BUG_TRAP(req->sk == NULL);

		if (seq != tcp_rsk(req)->snt_isn) {
			NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
			goto out;
		}

		inet_csk_reqsk_queue_drop(sk, req, prev);
		goto out;

	case TCP_SYN_SENT:
	case TCP_SYN_RECV:  /* Cannot happen.
			       It can, it SYNs are crossed. --ANK */ 
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;
			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */

			tcp_done(sk);
		} else
			sk->sk_err_soft = err;
		goto out;
	}

	if (!sock_owned_by_user(sk) && np->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else
		sk->sk_err_soft = err;

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}
static int ip6_dst_lookup_tail(struct sock *sk,
			       struct dst_entry **dst, struct flowi6 *fl6)
{
	struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	struct neighbour *n;
	struct rt6_info *rt;
#endif
	int err;

	if (*dst == NULL)
		*dst = ip6_route_output(net, sk, fl6);

	if ((err = (*dst)->error))
		goto out_err_release;

	if (ipv6_addr_any(&fl6->saddr)) {
		struct rt6_info *rt = (struct rt6_info *) *dst;
		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
					  sk ? inet6_sk(sk)->srcprefs : 0,
					  &fl6->saddr);
		if (err)
			goto out_err_release;
	}

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	/*
	 * Here if the dst entry we've looked up
	 * has a neighbour entry that is in the INCOMPLETE
	 * state and the src address from the flow is
	 * marked as OPTIMISTIC, we release the found
	 * dst entry and replace it instead with the
	 * dst entry of the nexthop router
	 */
	rt = (struct rt6_info *) *dst;
	rcu_read_lock_bh();
	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
	rcu_read_unlock_bh();

	if (err) {
		struct inet6_ifaddr *ifp;
		struct flowi6 fl_gw6;
		int redirect;

		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
				      (*dst)->dev, 1);

		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
		if (ifp)
			in6_ifa_put(ifp);

		if (redirect) {
			/*
			 * We need to get the dst entry for the
			 * default router instead
			 */
			dst_release(*dst);
			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
			*dst = ip6_route_output(net, sk, &fl_gw6);
			if ((err = (*dst)->error))
				goto out_err_release;
		}
	}
#endif

	return 0;

out_err_release:
	if (err == -ENETUNREACH)
		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
	dst_release(*dst);
	*dst = NULL;
	return err;
}
Esempio n. 22
0
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		 struct ip_vs_protocol *pp)
{
	struct rt6_info *rt;			/* Route to the other host */
	int    mtu;

	EnterFunction(10);

	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
					 0, (IP_VS_RT_MODE_LOCAL |
					     IP_VS_RT_MODE_NON_LOCAL))))
		goto tx_error_icmp;
	if (__ip_vs_is_local_route6(rt)) {
		dst_release(&rt->dst);
		IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if (skb->len > mtu) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		dst_release(&rt->dst);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error;
	}

	/*
	 * Call ip_send_check because we are not sure it is called
	 * after ip_defrag. Is copy-on-write needed?
	 */
	skb = skb_share_check(skb, GFP_ATOMIC);
	if (unlikely(skb == NULL)) {
		dst_release(&rt->dst);
		return NF_STOLEN;
	}

	/* drop old route */
	skb_dst_drop(skb);
	skb_dst_set(skb, &rt->dst);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);

	LeaveFunction(10);
	return NF_STOLEN;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	kfree_skb(skb);
	LeaveFunction(10);
	return NF_STOLEN;
}
Esempio n. 23
0
int ip6_route_me_harder(struct sk_buff *skb)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	const struct ipv6hdr *iph = ipv6_hdr(skb);
	unsigned int hh_len;
	struct dst_entry *dst;
	struct flowi6 fl6 = {
		.flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
		.flowi6_mark = skb->mark,
		.daddr = iph->daddr,
		.saddr = iph->saddr,
	};
	int err;

	dst = ip6_route_output(net, skb->sk, &fl6);
	err = dst->error;
	if (err) {
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
		net_dbg_ratelimited("ip6_route_me_harder: No more route\n");
		dst_release(dst);
		return err;
	}

	/* Drop old route. */
	skb_dst_drop(skb);

	skb_dst_set(skb, dst);

#ifdef CONFIG_XFRM
	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
	    xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
		skb_dst_set(skb, NULL);
		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), skb->sk, 0);
		if (IS_ERR(dst))
			return PTR_ERR(dst);
		skb_dst_set(skb, dst);
	}
#endif

	/* Change in oif may mean change in hh_len. */
	hh_len = skb_dst(skb)->dev->hard_header_len;
	if (skb_headroom(skb) < hh_len &&
	    pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)),
			     0, GFP_ATOMIC))
		return -ENOMEM;

	return 0;
}
EXPORT_SYMBOL(ip6_route_me_harder);

/*
 * Extra routing may needed on local out, as the QUEUE target never
 * returns control to the table.
 */

struct ip6_rt_info {
	struct in6_addr daddr;
	struct in6_addr saddr;
	u_int32_t mark;
};

static void nf_ip6_saveroute(const struct sk_buff *skb,
			     struct nf_queue_entry *entry)
{
	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);

	if (entry->state.hook == NF_INET_LOCAL_OUT) {
		const struct ipv6hdr *iph = ipv6_hdr(skb);

		rt_info->daddr = iph->daddr;
		rt_info->saddr = iph->saddr;
		rt_info->mark = skb->mark;
	}
}

static int nf_ip6_reroute(struct sk_buff *skb,
			  const struct nf_queue_entry *entry)
{
	struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);

	if (entry->state.hook == NF_INET_LOCAL_OUT) {
		const struct ipv6hdr *iph = ipv6_hdr(skb);
		if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
		    !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
		    skb->mark != rt_info->mark)
			return ip6_route_me_harder(skb);
	}
	return 0;
}

static int nf_ip6_route(struct net *net, struct dst_entry **dst,
			struct flowi *fl, bool strict)
{
	static const struct ipv6_pinfo fake_pinfo;
	static const struct inet_sock fake_sk = {
		/* makes ip6_route_output set RT6_LOOKUP_F_IFACE: */
		.sk.sk_bound_dev_if = 1,
		.pinet6 = (struct ipv6_pinfo *) &fake_pinfo,
	};
	const void *sk = strict ? &fake_sk : NULL;
	struct dst_entry *result;
	int err;

	result = ip6_route_output(net, sk, &fl->u.ip6);
	err = result->error;
	if (err)
		dst_release(result);
	else
		*dst = result;
	return err;
}

__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook,
			     unsigned int dataoff, u_int8_t protocol)
{
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
	__sum16 csum = 0;

	switch (skb->ip_summed) {
	case CHECKSUM_COMPLETE:
		if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN)
			break;
		if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
				     skb->len - dataoff, protocol,
				     csum_sub(skb->csum,
					      skb_checksum(skb, 0,
							   dataoff, 0)))) {
			skb->ip_summed = CHECKSUM_UNNECESSARY;
			break;
		}
		/* fall through */
	case CHECKSUM_NONE:
		skb->csum = ~csum_unfold(
				csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
					     skb->len - dataoff,
					     protocol,
					     csum_sub(0,
						      skb_checksum(skb, 0,
								   dataoff, 0))));
		csum = __skb_checksum_complete(skb);
	}
	return csum;
}
EXPORT_SYMBOL(nf_ip6_checksum);

static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
				       unsigned int dataoff, unsigned int len,
				       u_int8_t protocol)
{
	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
	__wsum hsum;
	__sum16 csum = 0;

	switch (skb->ip_summed) {
	case CHECKSUM_COMPLETE:
		if (len == skb->len - dataoff)
			return nf_ip6_checksum(skb, hook, dataoff, protocol);
		/* fall through */
	case CHECKSUM_NONE:
		hsum = skb_checksum(skb, 0, dataoff, 0);
		skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr,
							 &ip6h->daddr,
							 skb->len - dataoff,
							 protocol,
							 csum_sub(0, hsum)));
		skb->ip_summed = CHECKSUM_NONE;
		return __skb_checksum_complete_head(skb, dataoff + len);
	}
	return csum;
};

static const struct nf_ipv6_ops ipv6ops = {
	.chk_addr	= ipv6_chk_addr,
	.route_input    = ip6_route_input,
	.fragment	= ip6_fragment
};

static const struct nf_afinfo nf_ip6_afinfo = {
	.family			= AF_INET6,
	.checksum		= nf_ip6_checksum,
	.checksum_partial	= nf_ip6_checksum_partial,
	.route			= nf_ip6_route,
	.saveroute		= nf_ip6_saveroute,
	.reroute		= nf_ip6_reroute,
	.route_key_size		= sizeof(struct ip6_rt_info),
};

int __init ipv6_netfilter_init(void)
{
	RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
	return nf_register_afinfo(&nf_ip6_afinfo);
}

/* This can be called from inet6_init() on errors, so it cannot
 * be marked __exit. -DaveM
 */
void ipv6_netfilter_fini(void)
{
	RCU_INIT_POINTER(nf_ipv6_ops, NULL);
	nf_unregister_afinfo(&nf_ip6_afinfo);
}
Esempio n. 24
0
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp, int offset, unsigned int hooknum)
{
	struct rt6_info	*rt;	/* Route to the other host */
	int mtu;
	int rc;
	int local;
	int rt_mode;

	EnterFunction(10);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		if (cp->packet_xmit)
			rc = cp->packet_xmit(skb, cp, pp);
		else
			rc = NF_ACCEPT;
		/* do not touch skb anymore */
		atomic_inc(&cp->in_pkts);
		goto out;
	}

	/*
	 * mangle and send the packet here (only for VS/NAT)
	 */

	/* LOCALNODE from FORWARD hook is not supported */
	rt_mode = (hooknum != NF_INET_FORWARD) ?
		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
					 0, rt_mode)))
		goto tx_error_icmp;

	local = __ip_vs_is_local_route6(rt);
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG(10, "%s(): "
				  "stopping DNAT to local address %pI6\n",
				  __func__, &cp->daddr.in6);
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
		IP_VS_DBG(1, "%s(): "
			  "stopping DNAT to loopback %pI6\n",
			  __func__, &cp->daddr.in6);
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if (skb->len > mtu && !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, offset))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	ip_vs_nat_icmp_v6(skb, pp, cp, 0);

	if (!local || !skb->dev) {
		/* drop the old route when skb is not shared */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		/* destined to loopback, do we need to change route? */
		dst_release(&rt->dst);
	}

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);

	rc = NF_STOLEN;
	goto out;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	dev_kfree_skb(skb);
	rc = NF_STOLEN;
out:
	LeaveFunction(10);
	return rc;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Esempio n. 25
0
int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct net_device *dev = NULL;
	struct inet6_dev *idev;
	struct ipv6_ac_socklist *pac;
	int	ishost = !ipv6_devconf.forwarding;
	int	err = 0;

	if (!capable(CAP_NET_ADMIN))
		return -EPERM;
	if (ipv6_addr_is_multicast(addr))
		return -EINVAL;
	if (ipv6_chk_addr(addr, NULL, 0))
		return -EINVAL;

	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
	if (pac == NULL)
		return -ENOMEM;
	pac->acl_next = NULL;
	ipv6_addr_copy(&pac->acl_addr, addr);

	if (ifindex == 0) {
		struct rt6_info *rt;

		rt = rt6_lookup(addr, NULL, 0, 0);
		if (rt) {
			dev = rt->rt6i_dev;
			dev_hold(dev);
			dst_release(&rt->u.dst);
		} else if (ishost) {
			err = -EADDRNOTAVAIL;
			goto out_free_pac;
		} else {
			/* router, no matching interface: just pick one */

			dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
		}
	} else
		dev = dev_get_by_index(ifindex);

	if (dev == NULL) {
		err = -ENODEV;
		goto out_free_pac;
	}

	idev = in6_dev_get(dev);
	if (!idev) {
		if (ifindex)
			err = -ENODEV;
		else
			err = -EADDRNOTAVAIL;
		goto out_dev_put;
	}
	/* reset ishost, now that we have a specific device */
	ishost = !idev->cnf.forwarding;
	in6_dev_put(idev);

	pac->acl_ifindex = dev->ifindex;

	/* XXX
	 * For hosts, allow link-local or matching prefix anycasts.
	 * This obviates the need for propagating anycast routes while
	 * still allowing some non-router anycast participation.
	 */
	if (!ip6_onlink(addr, dev)) {
		if (ishost)
			err = -EADDRNOTAVAIL;
		if (err)
			goto out_dev_put;
	}

	err = ipv6_dev_ac_inc(dev, addr);
	if (err)
		goto out_dev_put;

	write_lock_bh(&ipv6_sk_ac_lock);
	pac->acl_next = np->ipv6_ac_list;
	np->ipv6_ac_list = pac;
	write_unlock_bh(&ipv6_sk_ac_lock);

	dev_put(dev);

	return 0;

out_dev_put:
	dev_put(dev);
out_free_pac:
	sock_kfree_s(sk, pac, sizeof(*pac));
	return err;
}
Esempio n. 26
0
/* Reroute packet to local IPv4 stack after DNAT */
static int
__ip_vs_reroute_locally(struct sk_buff *skb)
{
	struct rtable *rt = skb_rtable(skb);
	struct net_device *dev = rt->dst.dev;
	struct net *net = dev_net(dev);
	struct iphdr *iph = ip_hdr(skb);

	if (rt_is_input_route(rt)) {
		unsigned long orefdst = skb->_skb_refdst;

		if (ip_route_input(skb, iph->daddr, iph->saddr,
				   iph->tos, skb->dev))
			return 0;
		refdst_drop(orefdst);
	} else {
		struct flowi4 fl4 = {
			.daddr = iph->daddr,
			.saddr = iph->saddr,
			.flowi4_tos = RT_TOS(iph->tos),
			.flowi4_mark = skb->mark,
		};

		rt = ip_route_output_key(net, &fl4);
		if (IS_ERR(rt))
			return 0;
		if (!(rt->rt_flags & RTCF_LOCAL)) {
			ip_rt_put(rt);
			return 0;
		}
		/* Drop old route. */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	}
	return 1;
}

#ifdef CONFIG_IP_VS_IPV6

static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
	return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
}

static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
			struct in6_addr *ret_saddr, int do_xfrm)
{
	struct dst_entry *dst;
	struct flowi6 fl6 = {
		.daddr = *daddr,
	};

	dst = ip6_route_output(net, NULL, &fl6);
	if (dst->error)
		goto out_err;
	if (!ret_saddr)
		return dst;
	if (ipv6_addr_any(&fl6.saddr) &&
	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
			       &fl6.daddr, 0, &fl6.saddr) < 0)
		goto out_err;
	if (do_xfrm) {
		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
		if (IS_ERR(dst)) {
			dst = NULL;
			goto out_err;
		}
	}
	ipv6_addr_copy(ret_saddr, &fl6.saddr);
	return dst;

out_err:
	dst_release(dst);
	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
	return NULL;
}

/*
 * Get route to destination or remote server
 */
static struct rt6_info *
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
		      int do_xfrm, int rt_mode)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct rt6_info *rt;			/* Route to the other host */
	struct rt6_info *ort;			/* Original route */
	struct dst_entry *dst;
	int local;

	if (dest) {
		spin_lock(&dest->dst_lock);
		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
		if (!rt) {
			u32 cookie;

			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
						      &dest->dst_saddr.in6,
						      do_xfrm);
			if (!dst) {
				spin_unlock(&dest->dst_lock);
				return NULL;
			}
			rt = (struct rt6_info *) dst;
			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
				  &dest->addr.in6, &dest->dst_saddr.in6,
				  atomic_read(&rt->dst.__refcnt));
		}
		if (ret_saddr)
			ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
		spin_unlock(&dest->dst_lock);
	} else {
		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
		if (!dst)
			return NULL;
		rt = (struct rt6_info *) dst;
	}

	local = __ip_vs_is_local_route6(rt);
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
			     local ? "local":"non-local", daddr);
		dst_release(&rt->dst);
		return NULL;
	}
	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
	    !((ort = (struct rt6_info *) skb_dst(skb)) &&
	      __ip_vs_is_local_route6(ort))) {
		IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
			     "requires NAT method, dest: %pI6\n",
			     &ipv6_hdr(skb)->daddr, daddr);
		dst_release(&rt->dst);
		return NULL;
	}
	if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
		     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
				    IPV6_ADDR_LOOPBACK)) {
		IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
			     "to non-local address, dest: %pI6\n",
			     &ipv6_hdr(skb)->saddr, daddr);
		dst_release(&rt->dst);
		return NULL;
	}

	return rt;
}
#endif


/*
 *	Release dest->dst_cache before a dest is removed
 */
void
ip_vs_dst_reset(struct ip_vs_dest *dest)
{
	struct dst_entry *old_dst;

	old_dst = dest->dst_cache;
	dest->dst_cache = NULL;
	dst_release(old_dst);
}

#define IP_VS_XMIT_TUNNEL(skb, cp)				\
({								\
	int __ret = NF_ACCEPT;					\
								\
	(skb)->ipvs_property = 1;				\
	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
		__ret = ip_vs_confirm_conntrack(skb);		\
	if (__ret == NF_ACCEPT) {				\
		nf_reset(skb);					\
		skb_forward_csum(skb);				\
	}							\
	__ret;							\
})

#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	else						\
		ip_vs_update_conntrack(skb, cp, 1);	\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)

#define IP_VS_XMIT(pf, skb, cp, local)			\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)


/*
 *      NULL transmitter (do nothing except return NF_ACCEPT)
 */
int
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp)
{
	/* we do not touch skb and do not need pskb ptr */
	IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
Esempio n. 27
0
static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
    struct net_device *dev = skb->dev;
    struct iphdr *iph = ip_hdr(skb);
    struct nf_bridge_info *nf_bridge = skb->nf_bridge;
    int err;

    if (nf_bridge->mask & BRNF_PKT_TYPE) {
        skb->pkt_type = PACKET_OTHERHOST;
        nf_bridge->mask ^= BRNF_PKT_TYPE;
    }
    nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
    if (dnat_took_place(skb)) {
        if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
            struct rtable *rt;
            struct flowi fl = {
                .nl_u = {
                    .ip4_u = {
                         .daddr = iph->daddr,
                         .saddr = 0,
                         .tos = RT_TOS(iph->tos) },
                },
                .proto = 0,
            };
            struct in_device *in_dev = in_dev_get(dev);

            /* If err equals -EHOSTUNREACH the error is due to a
             * martian destination or due to the fact that
             * forwarding is disabled. For most martian packets,
             * ip_route_output_key() will fail. It won't fail for 2 types of
             * martian destinations: loopback destinations and destination
             * 0.0.0.0. In both cases the packet will be dropped because the
             * destination is the loopback device and not the bridge. */
            if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
                goto free_skb;

            if (!ip_route_output_key(&init_net, &rt, &fl)) {
                /* - Bridged-and-DNAT'ed traffic doesn't
                 *   require ip_forwarding. */
                if (((struct dst_entry *)rt)->dev == dev) {
                    skb->dst = (struct dst_entry *)rt;
                    goto bridged_dnat;
                }
                /* we are sure that forwarding is disabled, so printing
                 * this message is no problem. Note that the packet could
                 * still have a martian destination address, in which case
                 * the packet could be dropped even if forwarding were enabled */
                __br_dnat_complain();
                dst_release((struct dst_entry *)rt);
            }
free_skb:
            kfree_skb(skb);
            return 0;
        } else {
            if (skb->dst->dev == dev) {
bridged_dnat:
                /* Tell br_nf_local_out this is a
                 * bridged frame */
                nf_bridge->mask |= BRNF_BRIDGED_DNAT;
                skb->dev = nf_bridge->physindev;
                nf_bridge_push_encap_header(skb);
                NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
                           skb, skb->dev, NULL,
                           br_nf_pre_routing_finish_bridge,
                           1);
                return 0;
            }
            memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
            skb->pkt_type = PACKET_HOST;
        }
    } else {
Esempio n. 28
0
int
ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp)
{
	struct rt6_info *rt;		/* Route to the other host */
	int mtu;
	int local;

	EnterFunction(10);

	/* check if it is a connection of no-client-port */
	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
		__be16 _pt, *p;
		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
				       sizeof(_pt), &_pt);
		if (p == NULL)
			goto tx_error;
		ip_vs_conn_fill_cport(cp, *p);
		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
	}

	if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
					 0, (IP_VS_RT_MODE_LOCAL |
					     IP_VS_RT_MODE_NON_LOCAL |
					     IP_VS_RT_MODE_RDR))))
		goto tx_error_icmp;
	local = __ip_vs_is_local_route6(rt);
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
					 "ip_vs_nat_xmit_v6(): "
					 "stopping DNAT to local address");
			goto tx_error_put;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
	    ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
		IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
				 "ip_vs_nat_xmit_v6(): "
				 "stopping DNAT to loopback address");
		goto tx_error_put;
	}

	/* MTU checking */
	mtu = dst_mtu(&rt->dst);
	if (skb->len > mtu && !skb_is_gso(skb)) {
		if (!skb->dev) {
			struct net *net = dev_net(skb_dst(skb)->dev);

			skb->dev = net->loopback_dev;
		}
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
				 "ip_vs_nat_xmit_v6(): frag needed for");
		goto tx_error_put;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
		goto tx_error_put;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error_put;

	/* mangle the packet */
	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
		goto tx_error;
	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);

	if (!local || !skb->dev) {
		/* drop the old route when skb is not shared */
		skb_dst_drop(skb);
		skb_dst_set(skb, &rt->dst);
	} else {
		/* destined to loopback, do we need to change route? */
		dst_release(&rt->dst);
	}

	IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");

	/* FIXME: when application helper enlarges the packet and the length
	   is larger than the MTU of outgoing device, there will be still
	   MTU problem. */

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);

	LeaveFunction(10);
	return NF_STOLEN;

tx_error_icmp:
	dst_link_failure(skb);
tx_error:
	LeaveFunction(10);
	kfree_skb(skb);
	return NF_STOLEN;
tx_error_put:
	dst_release(&rt->dst);
	goto tx_error;
}
Esempio n. 29
0
static int ip6_dst_lookup_tail(struct sock *sk,
			       struct dst_entry **dst, struct flowi *fl)
{
	int err;
	struct net *net = sock_net(sk);

	if (*dst == NULL)
		*dst = ip6_route_output(net, sk, fl);

	if ((err = (*dst)->error))
		goto out_err_release;

	if (ipv6_addr_any(&fl->fl6_src)) {
		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
					 &fl->fl6_dst,
					 sk ? inet6_sk(sk)->srcprefs : 0,
					 &fl->fl6_src);
		if (err)
			goto out_err_release;
	}

#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	/*
	 * Here if the dst entry we've looked up
	 * has a neighbour entry that is in the INCOMPLETE
	 * state and the src address from the flow is
	 * marked as OPTIMISTIC, we release the found
	 * dst entry and replace it instead with the
	 * dst entry of the nexthop router
	 */
	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
		struct inet6_ifaddr *ifp;
		struct flowi fl_gw;
		int redirect;

		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
				      (*dst)->dev, 1);

		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
		if (ifp)
			in6_ifa_put(ifp);

		if (redirect) {
			/*
			 * We need to get the dst entry for the
			 * default router instead
			 */
			dst_release(*dst);
			memcpy(&fl_gw, fl, sizeof(struct flowi));
			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
			*dst = ip6_route_output(net, sk, &fl_gw);
			if ((err = (*dst)->error))
				goto out_err_release;
		}
	}
#endif

	return 0;

out_err_release:
	if (err == -ENETUNREACH)
		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
	dst_release(*dst);
	*dst = NULL;
	return err;
}
Esempio n. 30
0
static unsigned int
target(struct sk_buff **pskb,
       unsigned int hooknum,
       const struct net_device *in,
       const struct net_device *out,
       const void *targinfo,
       void *userinfo)
{
#endif

#if 0
static unsigned int ipt_route_target(struct sk_buff **pskb,
				     unsigned int hooknum,	
				     const struct net_device *in,
				     const struct net_device *out,
//#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
//				     const struct xt_target *target,
//#endif
//#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
				     const void *targinfo,
				     void *userinfo)
//#else
//				     const void *targinfo)
//#endif
#endif 

static unsigned int ipt_route_target
(struct sk_buff *skb2, const struct xt_target_param *par)
{
	const struct ipt_route_target_info *route_info = par->targinfo;
//	struct sk_buff *skb = *pskb;

	struct sk_buff **pskb = &skb2;
	struct sk_buff *skb = *pskb;

	unsigned int res;

#if 1
	if (skb->nfct == 
		&(route_tee_track.ct_general)) {
		/* Loopback - a packet we already routed, is to be
		 * routed another time. Avoid that, now.
		 */
		if (net_ratelimit()) 
			DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n");
		return NF_DROP;
	}
#endif

	/* If we are at PREROUTING or INPUT hook
	 * the TTL isn't decreased by the IP stack
	 */
	if (par->hooknum == NF_INET_PRE_ROUTING ||
	    par->hooknum == NF_INET_LOCAL_IN) {

		struct iphdr *iph = ip_hdr(skb);

		if (iph->ttl <= 1) {
			struct rtable *rt;
			struct flowi fl = {
				.oif = 0,
				.nl_u = {
					.ip4_u = {
						.daddr = iph->daddr,
						.saddr = iph->saddr,
						.tos = RT_TOS(iph->tos),
						.scope = ((iph->tos & RTO_ONLINK) ?
							  RT_SCOPE_LINK :
							  RT_SCOPE_UNIVERSE)
					}
				} 
			};

			if (ip_route_output_key(&init_net,&rt, &fl)) {
				return NF_DROP;
			}

			if (skb->dev == rt->u.dst.dev) {
				/* Drop old route. */
				dst_release(skb->dst);
				skb->dst = &rt->u.dst;

				/* this will traverse normal stack, and 
				 * thus call conntrack on the icmp packet */
				icmp_send(skb, ICMP_TIME_EXCEEDED, 
					  ICMP_EXC_TTL, 0);
			}

			return NF_DROP;
		}

		/*
		 * If we are at INPUT the checksum must be recalculated since
		 * the length could change as the result of a defragmentation.
		 */
		if(par->hooknum == NF_INET_LOCAL_IN) {
			iph->ttl = iph->ttl - 1;
			iph->check = 0;
			iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
		} else {
			ip_decrease_ttl(iph);
		}
	}

	if ((route_info->flags & IPT_ROUTE_TEE)) {
		/*
		 * Copy the *pskb, and route the copy. Will later return
		 * IPT_CONTINUE for the original skb, which should continue
		 * on its way as if nothing happened. The copy should be
		 * independantly delivered to the ROUTE --gw.
		 */
		skb = skb_copy(*pskb, GFP_ATOMIC);
		if (!skb) {
			if (net_ratelimit()) 
				DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n");
			return IPT_CONTINUE;
		}
	}

	/* Tell conntrack to forget this packet since it may get confused 
	 * when a packet is leaving with dst address == our address.
	 * Good idea ? Dunno. Need advice.
	 *
	 * NEW: mark the skb with our &route_tee_track, so we avoid looping
	 * on any already routed packet.
	 */
	if (!(route_info->flags & IPT_ROUTE_CONTINUE)) {
		nf_conntrack_put(skb->nfct);
		skb->nfct = &route_tee_track.ct_general;
		skb->nfctinfo = IP_CT_NEW;
		nf_conntrack_get(skb->nfct);
	}

	if (route_info->oif[0] != '\0') {
		res = route_oif(route_info, skb);
	} else if (route_info->iif[0] != '\0') {
		res = route_iif(route_info, skb);
	} else if (route_info->gw) {
		res = route_gw(route_info, skb);
	} else {
		if (net_ratelimit()) 
			DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n");
		res = IPT_CONTINUE;
	}

	if ((route_info->flags & IPT_ROUTE_TEE))
		res = IPT_CONTINUE;

	return res;
}

#if 0
static int ipt_route_checkentry(const char *tablename,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
				const void *e,
#else
//				const struct ipt_ip *ip,
#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
				const struct xt_target *target,
#endif
				void *targinfo,
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
				unsigned int targinfosize,
#endif
				unsigned int hook_mask)
#endif

/*
struct xt_tgchk_param {
	const char *table;
	const void *entryinfo;
	const struct xt_target *target;
	void *targinfo;
	unsigned int hook_mask;
	u_int8_t family;
};
*/

static bool ipt_route_checkentry(const struct xt_tgchk_param *par)
{
	if (strcmp(par->table, "mangle") != 0) {
		printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n",
		       par->table);
		return 0;
	}

	if (par->hook_mask & ~(  (1 << NF_INET_PRE_ROUTING)
			    | (1 << NF_INET_LOCAL_IN)
			    | (1 << NF_INET_FORWARD)
			    | (1 << NF_INET_LOCAL_OUT)
			    | (1 << NF_INET_POST_ROUTING))) {
		printk("ipt_ROUTE: bad hook\n");
		return 0;
	}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
	if (targinfosize != IPT_ALIGN(sizeof(struct ipt_route_target_info))) {
		printk(KERN_WARNING "ipt_ROUTE: targinfosize %u != %Zu\n",
		       targinfosize,
		       IPT_ALIGN(sizeof(struct ipt_route_target_info)));
		return 0;
	}
#endif

	return 1;
}


//static struct ipt_target ipt_route_reg = { 
static struct xt_target ipt_route_reg = { 
	.name = "ROUTE",
	.target = ipt_route_target,
	.family=AF_INET,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
	.targetsize = sizeof(struct ipt_route_target_info),
#endif
	.hooks=(1 << NF_INET_PRE_ROUTING)
			    | (1 << NF_INET_LOCAL_IN)
			    | (1 << NF_INET_FORWARD)
			    | (1 << NF_INET_LOCAL_OUT)
			    | (1 << NF_INET_POST_ROUTING),
	.checkentry = ipt_route_checkentry,
	.me = THIS_MODULE,
};

static int __init init(void)
{
	/* Set up fake conntrack (stolen from raw.patch):
	    - to never be deleted, not in any hashes */
	atomic_set(&route_tee_track.ct_general.use, 1);
	/*  - and look it like as a confirmed connection */
	set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status);
	/* Initialize fake conntrack so that NAT will skip it */
	route_tee_track.status |= IPS_NAT_DONE_MASK;

	return xt_register_target(&ipt_route_reg);
}


static void __exit fini(void)
{
	xt_unregister_target(&ipt_route_reg);
}