Exemple #1
0
/* save tcp sequense for fullnat/nat, INside to OUTside */
static void
tcp_save_out_seq(struct sk_buff *skb, struct ip_vs_conn *cp,
		 struct tcphdr *th, int ihl)
{
	if (unlikely(th == NULL) || unlikely(cp == NULL) ||
	    unlikely(skb == NULL))
		return;

	if (sysctl_ip_vs_conn_expire_tcp_rst && !th->rst) {

		/* seq out of order. just skip */
		if (before(ntohl(th->ack_seq), ntohl(cp->rs_ack_seq)) &&
							(cp->rs_ack_seq != 0))
			return;

		if (th->syn && th->ack)
			cp->rs_end_seq = htonl(ntohl(th->seq) + 1);
		else
			cp->rs_end_seq = htonl(ntohl(th->seq) + skb->len
					       - ihl - (th->doff << 2));
		cp->rs_ack_seq = th->ack_seq;
		IP_VS_DBG_RL("packet from RS, seq:%u ack_seq:%u.",
			     ntohl(th->seq), ntohl(th->ack_seq));
		IP_VS_DBG_RL("port:%u->%u", ntohs(th->source), ntohs(th->dest));
	}
}
Exemple #2
0
static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
			struct in6_addr *ret_saddr, int do_xfrm)
{
	struct dst_entry *dst;
	struct flowi6 fl6 = {
		.daddr = *daddr,
	};

	dst = ip6_route_output(net, NULL, &fl6);
	if (dst->error)
		goto out_err;
	if (!ret_saddr)
		return dst;
	if (ipv6_addr_any(&fl6.saddr) &&
	    ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
			       &fl6.daddr, 0, &fl6.saddr) < 0)
		goto out_err;
	if (do_xfrm) {
		dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
		if (IS_ERR(dst)) {
			dst = NULL;
			goto out_err;
		}
	}
	*ret_saddr = fl6.saddr;
	return dst;

out_err:
	dst_release(dst);
	IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
	return NULL;
}
static struct rtable *
__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
{
	struct rtable *rt;			/* Route to the other host */
	struct ip_vs_dest *dest = cp->dest;

	if (dest) {
		spin_lock(&dest->dst_lock);
		if (!(rt = (struct rtable *)
		      __ip_vs_dst_check(dest, rtos, 0))) {
			struct flowi fl = {
				.oif = 0,
				.nl_u = {
					.ip4_u = {
						.daddr = dest->addr.ip,
						.saddr = 0,
						.tos = rtos, } },
			};

			if (ip_route_output_key(&init_net, &rt, &fl)) {
				spin_unlock(&dest->dst_lock);
				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
					     &dest->addr.ip);
				return NULL;
			}
			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst));
			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
				  &dest->addr.ip,
				  atomic_read(&rt->u.dst.__refcnt), rtos);
		}
		spin_unlock(&dest->dst_lock);
	} else {
Exemple #4
0
int
ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
                 struct ip_vs_protocol *pp)
{
    struct rt6_info *rt;			/* Route to the other host */
    int    mtu;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
                                     0, 1|2)))
        goto tx_error_icmp;
    if (__ip_vs_is_local_route6(rt)) {
        dst_release(&rt->dst);
        IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if (skb->len > mtu) {
        if (!skb->dev) {
            struct net *net = dev_net(skb_dst(skb)->dev);

            skb->dev = net->loopback_dev;
        }
        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
        dst_release(&rt->dst);
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error;
    }

    /*
     * Call ip_send_check because we are not sure it is called
     * after ip_defrag. Is copy-on-write needed?
     */
    skb = skb_share_check(skb, GFP_ATOMIC);
    if (unlikely(skb == NULL)) {
        dst_release(&rt->dst);
        return NF_STOLEN;
    }

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);

    LeaveFunction(10);
    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
}
Exemple #5
0
/* Get route to daddr, update *saddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
				       int rt_mode, __be32 *saddr)
{
	struct flowi4 fl4;
	struct rtable *rt;
	int loop = 0;

	memset(&fl4, 0, sizeof(fl4));
	fl4.daddr = daddr;
	fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
	fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
			   FLOWI_FLAG_KNOWN_NH : 0;

retry:
	rt = ip_route_output_key(net, &fl4);
	if (IS_ERR(rt)) {
		/* Invalid saddr ? */
		if (PTR_ERR(rt) == -EINVAL && *saddr &&
		    rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
			*saddr = 0;
			flowi4_update_output(&fl4, 0, 0, daddr, 0);
			goto retry;
		}
		IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
		return NULL;
	} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
		ip_rt_put(rt);
		*saddr = fl4.saddr;
		flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
		loop++;
		goto retry;
	}
	*saddr = fl4.saddr;
	return rt;
}
Exemple #6
0
/*
 * Get route to destination or remote server
 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
 *	    &4=Allow redirect from remote daddr to local
 */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, u32 rtos, int rt_mode)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct rtable *rt;			/* Route to the other host */
	struct rtable *ort;			/* Original route */
	int local;

	if (dest) {
		spin_lock(&dest->dst_lock);
		if (!(rt = (struct rtable *)
		      __ip_vs_dst_check(dest, rtos))) {
			struct flowi fl = {
				.oif = 0,
				.nl_u = {
					.ip4_u = {
						.daddr = dest->addr.ip,
						.saddr = 0,
						.tos = rtos, } },
			};

			if (ip_route_output_key(net, &rt, &fl)) {
				spin_unlock(&dest->dst_lock);
				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
					     &dest->addr.ip);
				return NULL;
			}
			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
				  &dest->addr.ip,
				  atomic_read(&rt->dst.__refcnt), rtos);
		}
		spin_unlock(&dest->dst_lock);
	} else {
Exemple #7
0
/*
 *      Direct Routing transmitter
 *      Used for ANY protocol
 */
int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
              struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct iphdr  *iph = ip_hdr(skb);
    int    mtu;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(iph->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        ip_rt_put(rt);
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error;
    }

    /*
     * Call ip_send_check because we are not sure it is called
     * after ip_defrag. Is copy-on-write needed?
     */
    if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
        ip_rt_put(rt);
        return NF_STOLEN;
    }
    ip_send_check(ip_hdr(skb));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);

    LeaveFunction(10);
    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
}
Exemple #8
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                  struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct net_device *tdev;		/* Device to other host */
    struct iphdr  *old_iph = ip_hdr(skb);
    u8     tos = old_iph->tos;
    __be16 df = old_iph->frag_off;
    struct iphdr  *iph;			/* Our new IP header */
    unsigned int max_headroom;		/* The extra header space needed */
    int    mtu;
    int ret;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    tdev = rt->dst.dev;

    mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
    if (mtu < 68) {
        IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
        goto tx_error_put;
    }
    if (skb_dst(skb))
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

    df |= (old_iph->frag_off & htons(IP_DF));

    if ((old_iph->frag_off & htons(IP_DF) &&
            mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

    if (skb_headroom(skb) < max_headroom
            || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb =
            skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            ip_rt_put(rt);
            kfree_skb(skb);
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NF_STOLEN;
        }
        kfree_skb(skb);
        skb = new_skb;
        old_iph = ip_hdr(skb);
    }

    skb->transport_header = skb->network_header;

    /* fix old IP header checksum */
    ip_send_check(old_iph);

    skb_push(skb, sizeof(struct iphdr));
    skb_reset_network_header(skb);
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /*
     *	Push down and install the IPIP header.
     */
    iph			=	ip_hdr(skb);
    iph->version		=	4;
    iph->ihl		=	sizeof(struct iphdr)>>2;
    iph->frag_off		=	df;
    iph->protocol		=	IPPROTO_IPIP;
    iph->tos		=	tos;
    iph->daddr		=	rt->rt_dst;
    iph->saddr		=	rt->rt_src;
    iph->ttl		=	old_iph->ttl;
    ip_select_ident(iph, &rt->dst, NULL);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    ret = IP_VS_XMIT_TUNNEL(skb, cp);
    if (ret == NF_ACCEPT)
        ip_local_out(skb);
    else if (ret == NF_DROP)
        kfree_skb(skb);

    LeaveFunction(10);

    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Exemple #9
0
int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
                     struct ip_vs_protocol *pp)
{
    struct rt6_info *rt;		/* Route to the other host */
    struct in6_addr saddr;		/* Source for tunnel */
    struct net_device *tdev;	/* Device to other host */
    struct ipv6hdr  *old_iph = ipv6_hdr(skb);
    struct ipv6hdr  *iph;		/* Our new IP header */
    unsigned int max_headroom;	/* The extra header space needed */
    int    mtu;
    int ret;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
                                     &saddr, 1, 1|2)))
        goto tx_error_icmp;
    if (__ip_vs_is_local_route6(rt)) {
        dst_release(&rt->dst);
        IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
    }

    tdev = rt->dst.dev;

    mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
    if (mtu < IPV6_MIN_MTU) {
        IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
                     IPV6_MIN_MTU);
        goto tx_error_put;
    }
    if (skb_dst(skb))
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

    if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&
            !skb_is_gso(skb)) {
        if (!skb->dev) {
            struct net *net = dev_net(skb_dst(skb)->dev);

            skb->dev = net->loopback_dev;
        }
        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);

    if (skb_headroom(skb) < max_headroom
            || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb =
            skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            dst_release(&rt->dst);
            kfree_skb(skb);
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NF_STOLEN;
        }
        kfree_skb(skb);
        skb = new_skb;
        old_iph = ipv6_hdr(skb);
    }

    skb->transport_header = skb->network_header;

    skb_push(skb, sizeof(struct ipv6hdr));
    skb_reset_network_header(skb);
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /*
     *	Push down and install the IPIP header.
     */
    iph			=	ipv6_hdr(skb);
    iph->version		=	6;
    iph->nexthdr		=	IPPROTO_IPV6;
    iph->payload_len	=	old_iph->payload_len;
    be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
    iph->priority		=	old_iph->priority;
    memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
    ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
    ipv6_addr_copy(&iph->saddr, &saddr);
    iph->hop_limit		=	old_iph->hop_limit;

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    ret = IP_VS_XMIT_TUNNEL(skb, cp);
    if (ret == NF_ACCEPT)
        ip6_local_out(skb);
    else if (ret == NF_DROP)
        kfree_skb(skb);

    LeaveFunction(10);

    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    dst_release(&rt->dst);
    goto tx_error;
}
Exemple #10
0
int
ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
                   struct ip_vs_protocol *pp, int offset)
{
    struct rt6_info	*rt;	/* Route to the other host */
    int mtu;
    int rc;
    int local;

    EnterFunction(10);

    /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
       forwarded directly here, because there is no need to
       translate address/port back */
    if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
        if (cp->packet_xmit)
            rc = cp->packet_xmit(skb, cp, pp);
        else
            rc = NF_ACCEPT;
        /* do not touch skb anymore */
        atomic_inc_unchecked(&cp->in_pkts);
        goto out;
    }

    /*
     * mangle and send the packet here (only for VS/NAT)
     */

    if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
                                     0, 1|2|4)))
        goto tx_error_icmp;

    local = __ip_vs_is_local_route6(rt);
    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG(10, "%s(): "
                      "stopping DNAT to local address %pI6\n",
                      __func__, &cp->daddr.in6);
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
            ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
        IP_VS_DBG(1, "%s(): "
                  "stopping DNAT to loopback %pI6\n",
                  __func__, &cp->daddr.in6);
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if (skb->len > mtu && !skb_is_gso(skb)) {
        if (!skb->dev) {
            struct net *net = dev_net(skb_dst(skb)->dev);

            skb->dev = net->loopback_dev;
        }
        icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, offset))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    ip_vs_nat_icmp_v6(skb, pp, cp, 0);

    if (!local || !skb->dev) {
        /* drop the old route when skb is not shared */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        /* destined to loopback, do we need to change route? */
        dst_release(&rt->dst);
    }

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);

    rc = NF_STOLEN;
    goto out;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    dev_kfree_skb(skb);
    rc = NF_STOLEN;
out:
    LeaveFunction(10);
    return rc;
tx_error_put:
    dst_release(&rt->dst);
    goto tx_error;
}
Exemple #11
0
/* Reroute packet to local IPv4 stack after DNAT */
static int
__ip_vs_reroute_locally(struct sk_buff *skb)
{
    struct rtable *rt = skb_rtable(skb);
    struct net_device *dev = rt->dst.dev;
    struct net *net = dev_net(dev);
    struct iphdr *iph = ip_hdr(skb);

    if (rt_is_input_route(rt)) {
        unsigned long orefdst = skb->_skb_refdst;

        if (ip_route_input(skb, iph->daddr, iph->saddr,
                           iph->tos, skb->dev))
            return 0;
        refdst_drop(orefdst);
    } else {
        struct flowi4 fl4 = {
            .daddr = iph->daddr,
            .saddr = iph->saddr,
            .flowi4_tos = RT_TOS(iph->tos),
            .flowi4_mark = skb->mark,
        };

        rt = ip_route_output_key(net, &fl4);
        if (IS_ERR(rt))
            return 0;
        if (!(rt->rt_flags & RTCF_LOCAL)) {
            ip_rt_put(rt);
            return 0;
        }
        /* Drop old route. */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    }
    return 1;
}

#ifdef CONFIG_IP_VS_IPV6

static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
{
    return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
}

static struct dst_entry *
__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
                        struct in6_addr *ret_saddr, int do_xfrm)
{
    struct dst_entry *dst;
    struct flowi6 fl6 = {
        .daddr = *daddr,
    };

    dst = ip6_route_output(net, NULL, &fl6);
    if (dst->error)
        goto out_err;
    if (!ret_saddr)
        return dst;
    if (ipv6_addr_any(&fl6.saddr) &&
            ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
                               &fl6.daddr, 0, &fl6.saddr) < 0)
        goto out_err;
    if (do_xfrm) {
        dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0);
        if (IS_ERR(dst)) {
            dst = NULL;
            goto out_err;
        }
    }
    ipv6_addr_copy(ret_saddr, &fl6.saddr);
    return dst;

out_err:
    dst_release(dst);
    IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
    return NULL;
}

/*
 * Get route to destination or remote server
 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
 *	    &4=Allow redirect from remote daddr to local
 */
static struct rt6_info *
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
                      struct in6_addr *daddr, struct in6_addr *ret_saddr,
                      int do_xfrm, int rt_mode)
{
    struct net *net = dev_net(skb_dst(skb)->dev);
    struct rt6_info *rt;			/* Route to the other host */
    struct rt6_info *ort;			/* Original route */
    struct dst_entry *dst;
    int local;

    if (dest) {
        spin_lock(&dest->dst_lock);
        rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
        if (!rt) {
            u32 cookie;

            dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
                                          &dest->dst_saddr,
                                          do_xfrm);
            if (!dst) {
                spin_unlock(&dest->dst_lock);
                return NULL;
            }
            rt = (struct rt6_info *) dst;
            cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
            __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
            IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
                      &dest->addr.in6, &dest->dst_saddr,
                      atomic_read(&rt->dst.__refcnt));
        }
        if (ret_saddr)
            ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
        spin_unlock(&dest->dst_lock);
    } else {
        dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
        if (!dst)
            return NULL;
        rt = (struct rt6_info *) dst;
    }

    local = __ip_vs_is_local_route6(rt);
    if (!((local ? 1 : 2) & rt_mode)) {
        IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
                     local ? "local":"non-local", daddr);
        dst_release(&rt->dst);
        return NULL;
    }
    if (local && !(rt_mode & 4) &&
            !((ort = (struct rt6_info *) skb_dst(skb)) &&
              __ip_vs_is_local_route6(ort))) {
        IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
                     "requires NAT method, dest: %pI6\n",
                     &ipv6_hdr(skb)->daddr, daddr);
        dst_release(&rt->dst);
        return NULL;
    }
    if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
                 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
                 IPV6_ADDR_LOOPBACK)) {
        IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
                     "to non-local address, dest: %pI6\n",
                     &ipv6_hdr(skb)->saddr, daddr);
        dst_release(&rt->dst);
        return NULL;
    }

    return rt;
}
#endif


/*
 *	Release dest->dst_cache before a dest is removed
 */
void
ip_vs_dst_reset(struct ip_vs_dest *dest)
{
    struct dst_entry *old_dst;

    old_dst = dest->dst_cache;
    dest->dst_cache = NULL;
    dst_release(old_dst);
}

#define IP_VS_XMIT_TUNNEL(skb, cp)				\
({								\
	int __ret = NF_ACCEPT;					\
								\
	(skb)->ipvs_property = 1;				\
	if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT))		\
		__ret = ip_vs_confirm_conntrack(skb, cp);	\
	if (__ret == NF_ACCEPT) {				\
		nf_reset(skb);					\
		skb_forward_csum(skb);				\
	}							\
	__ret;							\
})

#define IP_VS_XMIT_NAT(pf, skb, cp, local)		\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	else						\
		ip_vs_update_conntrack(skb, cp, 1);	\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)

#define IP_VS_XMIT(pf, skb, cp, local)			\
do {							\
	(skb)->ipvs_property = 1;			\
	if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT)))	\
		ip_vs_notrack(skb);			\
	if (local)					\
		return NF_ACCEPT;			\
	skb_forward_csum(skb);				\
	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
		skb_dst(skb)->dev, dst_output);		\
} while (0)


/*
 *      NULL transmitter (do nothing except return NF_ACCEPT)
 */
int
ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp)
{
    /* we do not touch skb and do not need pskb ptr */
    IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
}
Exemple #12
0
/* Get route to destination or remote server */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct rtable *rt;			/* Route to the other host */
	struct rtable *ort;			/* Original route */
	int local;

	if (dest) {
		spin_lock(&dest->dst_lock);
		if (!(rt = (struct rtable *)
		      __ip_vs_dst_check(dest, rtos))) {
			rt = do_output_route4(net, dest->addr.ip, rtos,
					      rt_mode, &dest->dst_saddr.ip);
			if (!rt) {
				spin_unlock(&dest->dst_lock);
				return NULL;
			}
			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
				  "rtos=%X\n",
				  &dest->addr.ip, &dest->dst_saddr.ip,
				  atomic_read(&rt->dst.__refcnt), rtos);
		}
		daddr = dest->addr.ip;
		if (ret_saddr)
			*ret_saddr = dest->dst_saddr.ip;
		spin_unlock(&dest->dst_lock);
	} else {
		__be32 saddr = htonl(INADDR_ANY);

		/* For such unconfigured boxes avoid many route lookups
		 * for performance reasons because we do not remember saddr
		 */
		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
		rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
		if (!rt)
			return NULL;
		if (ret_saddr)
			*ret_saddr = saddr;
	}

	local = rt->rt_flags & RTCF_LOCAL;
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
			     (rt->rt_flags & RTCF_LOCAL) ?
			     "local":"non-local", &daddr);
		ip_rt_put(rt);
		return NULL;
	}
	if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
	    !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
		IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
			     "requires NAT method, dest: %pI4\n",
			     &ip_hdr(skb)->daddr, &daddr);
		ip_rt_put(rt);
		return NULL;
	}
	if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
		IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
			     "to non-local address, dest: %pI4\n",
			     &ip_hdr(skb)->saddr, &daddr);
		ip_rt_put(rt);
		return NULL;
	}

	return rt;
}
Exemple #13
0
/*
 *	ICMP packet transmitter
 *	called by the ip_vs_in_icmp
 */
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp, int offset)
{
    struct rtable	*rt;	/* Route to the other host */
    int mtu;
    int rc;
    int local;

    EnterFunction(10);

    /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
       forwarded directly here, because there is no need to
       translate address/port back */
    if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
        if (cp->packet_xmit)
            rc = cp->packet_xmit(skb, cp, pp);
        else
            rc = NF_ACCEPT;
        /* do not touch skb anymore */
        atomic_inc_unchecked(&cp->in_pkts);
        goto out;
    }

    /*
     * mangle and send the packet here (only for VS/NAT)
     */

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(ip_hdr(skb)->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL |
                                  IP_VS_RT_MODE_RDR)))
        goto tx_error_icmp;
    local = rt->rt_flags & RTCF_LOCAL;

    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG(10, "%s(): "
                      "stopping DNAT to local address %pI4\n",
                      __func__, &cp->daddr.ip);
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && ipv4_is_loopback(rt->rt_dst) &&
            rt_is_input_route(skb_rtable(skb))) {
        IP_VS_DBG(1, "%s(): "
                  "stopping DNAT to loopback %pI4\n",
                  __func__, &cp->daddr.ip);
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, offset))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    ip_vs_nat_icmp(skb, pp, cp, 0);

    if (!local) {
        /* drop the old route when skb is not shared */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        ip_rt_put(rt);
        /*
         * Some IPv4 replies get local address from routes,
         * not from iph, so while we DNAT after routing
         * we need this second input/output route.
         */
        if (!__ip_vs_reroute_locally(skb))
            goto tx_error;
    }

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

    rc = NF_STOLEN;
    goto out;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    dev_kfree_skb(skb);
    rc = NF_STOLEN;
out:
    LeaveFunction(10);
    return rc;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Exemple #14
0
/*
 * Create syn packet and send it to rs.
 * ATTENTION: we also store syn skb in cp if syn retransimition
 * is tured on.
 */
static int
syn_proxy_send_rs_syn(int af, const struct tcphdr *th,
		      struct ip_vs_conn *cp, struct sk_buff *skb,
		      struct ip_vs_protocol *pp, struct ip_vs_synproxy_opt *opt)
{
	struct sk_buff *syn_skb;
	int tcp_hdr_size;
	__u8 tcp_flags = TCPCB_FLAG_SYN;
	unsigned int tcphoff;
	struct tcphdr *new_th;

	if (!cp->packet_xmit) {
		IP_VS_ERR_RL("warning: packet_xmit is null");
		return 0;
	}

	syn_skb = alloc_skb(MAX_TCP_HEADER + 15, GFP_ATOMIC);
	if (unlikely(syn_skb == NULL)) {
		IP_VS_ERR_RL("alloc skb failed when send rs syn packet\n");
		return 0;
	}

	/* Reserve space for headers */
	skb_reserve(syn_skb, MAX_TCP_HEADER);
	tcp_hdr_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
			(opt->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
			(opt->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
			/* SACK_PERM is in the place of NOP NOP of TS */
			((opt->sack_ok
			  && !opt->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));

	new_th = (struct tcphdr *)skb_push(syn_skb, tcp_hdr_size);
	/* Compose tcp header */
	skb_reset_transport_header(syn_skb);
	syn_skb->csum = 0;

	/* Set tcp hdr */
	new_th->source = th->source;
	new_th->dest = th->dest;
	new_th->seq = htonl(ntohl(th->seq) - 1);
	new_th->ack_seq = 0;
	*(((__u16 *) new_th) + 6) =
	    htons(((tcp_hdr_size >> 2) << 12) | tcp_flags);
	/* FIX_ME: what window should we use */
	new_th->window = htons(5000);
	new_th->check = 0;
	new_th->urg_ptr = 0;
	new_th->urg = 0;
	new_th->ece = 0;
	new_th->cwr = 0;

	syn_proxy_syn_build_options((__be32 *) (new_th + 1), opt);

	/*
	 * Set ip hdr
	 * Attention: set source and dest addr to ack skb's.
	 * we rely on packet_xmit func to do NATs thing.
	 */
#ifdef CONFIG_IP_VS_IPV6
	if (af == AF_INET6) {
		struct ipv6hdr *ack_iph = ipv6_hdr(skb);
		struct ipv6hdr *iph =
		    (struct ipv6hdr *)skb_push(syn_skb, sizeof(struct ipv6hdr));

		tcphoff = sizeof(struct ipv6hdr);
		skb_reset_network_header(syn_skb);
		memcpy(&iph->saddr, &ack_iph->saddr, sizeof(struct in6_addr));
		memcpy(&iph->daddr, &ack_iph->daddr, sizeof(struct in6_addr));

		iph->version = 6;
		iph->nexthdr = NEXTHDR_TCP;
		iph->payload_len = htons(tcp_hdr_size);
		iph->hop_limit = IPV6_DEFAULT_HOPLIMIT;

		new_th->check = 0;
		syn_skb->csum =
		    skb_checksum(syn_skb, tcphoff, syn_skb->len - tcphoff, 0);
		new_th->check =
		    csum_ipv6_magic(&iph->saddr, &iph->daddr,
				    syn_skb->len - tcphoff, IPPROTO_TCP,
				    syn_skb->csum);
	} else
#endif
	{
		struct iphdr *ack_iph = ip_hdr(skb);
		u32 rtos = RT_TOS(ack_iph->tos);
		struct iphdr *iph =
		    (struct iphdr *)skb_push(syn_skb, sizeof(struct iphdr));

		tcphoff = sizeof(struct iphdr);
		skb_reset_network_header(syn_skb);
		*((__u16 *) iph) = htons((4 << 12) | (5 << 8) | (rtos & 0xff));
		iph->tot_len = htons(syn_skb->len);
		iph->frag_off = htons(IP_DF);
		/* FIX_ME: what ttl shoule we use */
		iph->ttl = IPDEFTTL;
		iph->protocol = IPPROTO_TCP;
		iph->saddr = ack_iph->saddr;
		iph->daddr = ack_iph->daddr;

		ip_send_check(iph);

		new_th->check = 0;
		syn_skb->csum =
		    skb_checksum(syn_skb, tcphoff, syn_skb->len - tcphoff, 0);
		new_th->check =
		    csum_tcpudp_magic(iph->saddr, iph->daddr,
				      syn_skb->len - tcphoff, IPPROTO_TCP,
				      syn_skb->csum);
	}

	/* Save syn_skb if syn retransmission is on  */
	if (sysctl_ip_vs_synproxy_syn_retry > 0) {
		cp->syn_skb = skb_copy(syn_skb, GFP_ATOMIC);
		atomic_set(&cp->syn_retry_max, sysctl_ip_vs_synproxy_syn_retry);
	}

	/* Save info for fast_response_xmit */
	if(sysctl_ip_vs_fast_xmit && skb->dev &&
				likely(skb->dev->type == ARPHRD_ETHER) &&
				skb_mac_header_was_set(skb)) {
		struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);

		if(likely(cp->indev == NULL)) {
			cp->indev = skb->dev;
			dev_hold(cp->indev);
		}

		if (unlikely(cp->indev != skb->dev)) {
			dev_put(cp->indev);
			cp->indev = skb->dev;
			dev_hold(cp->indev);
		}

		memcpy(cp->src_hwaddr, eth->h_source, ETH_ALEN);
		memcpy(cp->dst_hwaddr, eth->h_dest, ETH_ALEN);
		IP_VS_INC_ESTATS(ip_vs_esmib, FAST_XMIT_SYNPROXY_SAVE);
		IP_VS_DBG_RL("syn_proxy_send_rs_syn netdevice:%s\n",
						netdev_name(skb->dev));
	}

	/* count in the syn packet */
	ip_vs_in_stats(cp, skb);

	/* If xmit failed, syn_skb will be freed correctly. */
	cp->packet_xmit(syn_skb, cp, pp);

	return 1;
}
Exemple #15
0
/*
 *	Check if it's for virtual services, look it up,
 *	and send it on its way...
 */
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
	 const struct net_device *in, const struct net_device *out,
	 int (*okfn)(struct sk_buff *))
{
	struct sk_buff	*skb = *pskb;
	struct iphdr	*iph;
	struct ip_vs_protocol *pp;
	struct ip_vs_conn *cp;
	int ret, restart;
	int ihl;

	/*
	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
	 *	... don't know why 1st test DOES NOT include 2nd (?)
	 */
	if (unlikely(skb->pkt_type != PACKET_HOST
		     || skb->dev == &loopback_dev || skb->sk)) {
		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
			  skb->pkt_type,
			  ip_hdr(skb)->protocol,
			  NIPQUAD(ip_hdr(skb)->daddr));
		return NF_ACCEPT;
	}

	iph = ip_hdr(skb);
	if (unlikely(iph->protocol == IPPROTO_ICMP)) {
		int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);

		if (related)
			return verdict;
		skb = *pskb;
		iph = ip_hdr(skb);
	}

	/* Protocol supported? */
	pp = ip_vs_proto_get(iph->protocol);
	if (unlikely(!pp))
		return NF_ACCEPT;

	ihl = iph->ihl << 2;

	/*
	 * Check if the packet belongs to an existing connection entry
	 */
	cp = pp->conn_in_get(skb, pp, iph, ihl, 0);

	if (unlikely(!cp)) {
		int v;

		if (!pp->conn_schedule(skb, pp, &v, &cp))
			return v;
	}

	if (unlikely(!cp)) {
		/* sorry, all this trouble for a no-hit :) */
		IP_VS_DBG_PKT(12, pp, skb, 0,
			      "packet continues traversal as normal");
		return NF_ACCEPT;
	}

	IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet");

	/* Check the server status */
	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
		/* the destination server is not available */

		if (sysctl_ip_vs_expire_nodest_conn) {
			/* try to expire the connection immediately */
			ip_vs_conn_expire_now(cp);
		}
		/* don't restart its timer, and silently
		   drop the packet. */
		__ip_vs_conn_put(cp);
		return NF_DROP;
	}

	ip_vs_in_stats(cp, skb);
	restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
	if (cp->packet_xmit)
		ret = cp->packet_xmit(skb, cp, pp);
		/* do not touch skb anymore */
	else {
		IP_VS_DBG_RL("warning: packet_xmit is null");
		ret = NF_ACCEPT;
	}

	/* increase its packet counter and check if it is needed
	   to be synchronized */
	atomic_inc(&cp->in_pkts);
	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
	    (cp->protocol != IPPROTO_TCP ||
	     cp->state == IP_VS_TCP_S_ESTABLISHED) &&
	    (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
	     == sysctl_ip_vs_sync_threshold[0]))
		ip_vs_sync_conn(cp);

	ip_vs_conn_put(cp);
	return ret;
}
Exemple #16
0
/*
 *	It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
 *	Check if outgoing packet belongs to the established ip_vs_conn,
 *      rewrite addresses of the packet and send it on its way...
 */
static unsigned int ip_vs_out(unsigned int hooknum,
			      struct sk_buff **skb_p,
			      const struct net_device *in,
			      const struct net_device *out,
			      int (*okfn)(struct sk_buff *))
{
	struct sk_buff  *skb = *skb_p;
	struct iphdr	*iph;
	union ip_vs_tphdr h;
	struct ip_vs_conn *cp;
	int size;
	int ihl;

	EnterFunction(11);

	if (skb->nfcache & NFC_IPVS_PROPERTY)
		return NF_ACCEPT;

	iph = skb->nh.iph;
	if (iph->protocol == IPPROTO_ICMP)
		return ip_vs_out_icmp(skb_p);

	/* let it go if other IP protocols */
	if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
		return NF_ACCEPT;

	/* reassemble IP fragments */
	if (iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
		skb = ip_defrag(skb, IP_DEFRAG_VS_OUT);
		if (!skb)
			return NF_STOLEN;
		iph = skb->nh.iph;
		*skb_p = skb;
	}

	/* make sure that protocol header available in skb data area,
	   note that skb data area may be reallocated. */
	ihl = iph->ihl << 2;
	if (ip_vs_header_check(skb, iph->protocol, ihl) == -1)
		return NF_DROP;

	iph = skb->nh.iph;
	h.raw = (char*) iph + ihl;

	/*
	 *	Check if the packet belongs to an old entry
	 */
	cp = ip_vs_conn_out_get(iph->protocol, iph->saddr, h.portp[0],
				iph->daddr, h.portp[1]);
	if (!cp) {
		if (sysctl_ip_vs_nat_icmp_send &&
		    ip_vs_lookup_real_service(iph->protocol,
					      iph->saddr, h.portp[0])) {
			/*
			 * Notify the real server: there is no existing
			 * entry if it is not RST packet or not TCP packet.
			 */
			if (!h.th->rst || iph->protocol != IPPROTO_TCP) {
				icmp_send(skb, ICMP_DEST_UNREACH,
					  ICMP_PORT_UNREACH, 0);
				kfree_skb(skb);
				return NF_STOLEN;
			}
		}
		IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d "
			  "continue traversal as normal.\n",
			  ip_vs_proto_name(iph->protocol),
			  NIPQUAD(iph->daddr),
			  ntohs(h.portp[1]));
		if (skb_is_nonlinear(skb))
			ip_send_check(iph);
		return NF_ACCEPT;
	}

	/*
	 * If it has ip_vs_app helper, the helper may change the payload,
	 * so it needs full checksum checking and checksum calculation.
	 * If not, only the header (addr/port) is changed, so it is fast
	 * to do incremental checksum update, and let the destination host
	 * do final checksum checking.
	 */

	if (cp->app && skb_is_nonlinear(skb)) {
		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
			ip_vs_conn_put(cp);
			return NF_DROP;
		}
		iph = skb->nh.iph;
		h.raw = (char*) iph + ihl;
	}

	size = skb->len - ihl;
	IP_VS_DBG(11, "O-pkt: %s size=%d\n",
		  ip_vs_proto_name(iph->protocol), size);

	/* do TCP/UDP checksum checking if it has application helper */
	if (cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
		switch (skb->ip_summed) {
		case CHECKSUM_NONE:
			skb->csum = csum_partial(h.raw, size, 0);
		case CHECKSUM_HW:
			if (csum_tcpudp_magic(iph->saddr, iph->daddr, size,
					      iph->protocol, skb->csum)) {
				ip_vs_conn_put(cp);
				IP_VS_DBG_RL("Outgoing failed %s checksum "
					     "from %d.%d.%d.%d (size=%d)!\n",
					     ip_vs_proto_name(iph->protocol),
					     NIPQUAD(iph->saddr),
					     size);
				return NF_DROP;
			}
			break;
		default:
			/* CHECKSUM_UNNECESSARY */
			break;
		}
	}

	IP_VS_DBG(11, "Outgoing %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
		  ip_vs_proto_name(iph->protocol),
		  NIPQUAD(iph->saddr), ntohs(h.portp[0]),
		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));

	/* mangle the packet */
	iph->saddr = cp->vaddr;
	h.portp[0] = cp->vport;

	/*
	 *	Call application helper if needed
	 */
	if (ip_vs_app_pkt_out(cp, skb) != 0) {
		/* skb data has probably changed, update pointers */
		iph = skb->nh.iph;
		h.raw = (char*)iph + ihl;
		size = skb->len - ihl;
	}

	/*
	 *	Adjust TCP/UDP checksums
	 */
	if (!cp->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) {
		/* Only port and addr are changed, do fast csum update */
		ip_vs_fast_check_update(&h, cp->daddr, cp->vaddr,
					cp->dport, cp->vport, iph->protocol);
		if (skb->ip_summed == CHECKSUM_HW)
			skb->ip_summed = CHECKSUM_NONE;
	} else {
		/* full checksum calculation */
		switch (iph->protocol) {
		case IPPROTO_TCP:
			h.th->check = 0;
			skb->csum = csum_partial(h.raw, size, 0);
			h.th->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
							size, iph->protocol,
							skb->csum);
			IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
				  ip_vs_proto_name(iph->protocol), h.th->check,
				  (char*)&(h.th->check) - (char*)h.raw);
			break;
		case IPPROTO_UDP:
			h.uh->check = 0;
			skb->csum = csum_partial(h.raw, size, 0);
			h.uh->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
							size, iph->protocol,
							skb->csum);
			if (h.uh->check == 0)
				h.uh->check = 0xFFFF;
			IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%d)\n",
				  ip_vs_proto_name(iph->protocol), h.uh->check,
				  (char*)&(h.uh->check) - (char*)h.raw);
			break;
		}
	}
	ip_send_check(iph);

	ip_vs_out_stats(cp, skb);
	ip_vs_set_state(cp, VS_STATE_OUTPUT, iph, h.portp);
	ip_vs_conn_put(cp);

	skb->nfcache |= NFC_IPVS_PROPERTY;

	LeaveFunction(11);
	return NF_ACCEPT;
}
Exemple #17
0
/*
 *	Check if it's for virtual services, look it up,
 *	and send it on its way...
 */
static unsigned int ip_vs_in(unsigned int hooknum,
			     struct sk_buff **skb_p,
			     const struct net_device *in,
			     const struct net_device *out,
			     int (*okfn)(struct sk_buff *))
{
	struct sk_buff	*skb = *skb_p;
	struct iphdr	*iph = skb->nh.iph;
	union ip_vs_tphdr h;
	struct ip_vs_conn *cp;
	struct ip_vs_service *svc;
	int ihl;
	int ret;

	/*
	 *	Big tappo: only PACKET_HOST (nor loopback neither mcasts)
	 *	... don't know why 1st test DOES NOT include 2nd (?)
	 */
	if (skb->pkt_type != PACKET_HOST || skb->dev == &loopback_dev) {
		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
			  skb->pkt_type,
			  iph->protocol,
			  NIPQUAD(iph->daddr));
		return NF_ACCEPT;
	}

	if (iph->protocol == IPPROTO_ICMP)
		return ip_vs_in_icmp(skb_p);

	/* let it go if other IP protocols */
	if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
		return NF_ACCEPT;

	/* make sure that protocol header available in skb data area,
	   note that skb data area may be reallocated. */
	ihl = iph->ihl << 2;
	if (ip_vs_header_check(skb, iph->protocol, ihl) == -1)
		return NF_DROP;
	iph = skb->nh.iph;
	h.raw = (char*) iph + ihl;

	/*
	 * Check if the packet belongs to an existing connection entry
	 */
	cp = ip_vs_conn_in_get(iph->protocol, iph->saddr, h.portp[0],
			       iph->daddr, h.portp[1]);

	if (!cp &&
	    (h.th->syn || (iph->protocol!=IPPROTO_TCP)) &&
	    (svc = ip_vs_service_get(skb->nfmark, iph->protocol,
				     iph->daddr, h.portp[1]))) {
		if (ip_vs_todrop()) {
			/*
			 * It seems that we are very loaded.
			 * We have to drop this packet :(
			 */
			ip_vs_service_put(svc);
			return NF_DROP;
		}

		/*
		 * Let the virtual server select a real server for the
		 * incoming connection, and create a connection entry.
		 */
		cp = ip_vs_schedule(svc, iph);
		if (!cp)
			return ip_vs_leave(svc, skb);
		ip_vs_conn_stats(cp, svc);
		ip_vs_service_put(svc);
	}

	if (!cp) {
		/* sorry, all this trouble for a no-hit :) */
		IP_VS_DBG(12, "packet for %s %d.%d.%d.%d:%d continue "
			  "traversal as normal.\n",
			  ip_vs_proto_name(iph->protocol),
			  NIPQUAD(iph->daddr),
			  ntohs(h.portp[1]));
		return NF_ACCEPT;
	}

	IP_VS_DBG(11, "Incoming %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d\n",
		  ip_vs_proto_name(iph->protocol),
		  NIPQUAD(iph->saddr), ntohs(h.portp[0]),
		  NIPQUAD(iph->daddr), ntohs(h.portp[1]));

	/* Check the server status */
	if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
		/* the destination server is not available */

		if (sysctl_ip_vs_expire_nodest_conn) {
			/* try to expire the connection immediately */
			ip_vs_conn_expire_now(cp);
		} else {
			/* don't restart its timer, and silently
			   drop the packet. */
			__ip_vs_conn_put(cp);
		}
		return NF_DROP;
	}

	ip_vs_in_stats(cp, skb);
	ip_vs_set_state(cp, VS_STATE_INPUT, iph, h.portp);
	if (cp->packet_xmit)
		ret = cp->packet_xmit(skb, cp);
	else {
		IP_VS_DBG_RL("warning: packet_xmit is null");
		ret = NF_ACCEPT;
	}

	/* increase its packet counter and check if it is needed
	   to be synchronized */
	atomic_inc(&cp->in_pkts);
	if (ip_vs_sync_state & IP_VS_STATE_MASTER &&
	    (cp->protocol != IPPROTO_TCP ||
	     cp->state == IP_VS_S_ESTABLISHED) &&
	    (atomic_read(&cp->in_pkts) % 50 == sysctl_ip_vs_sync_threshold))
		ip_vs_sync_conn(cp);

	ip_vs_conn_put(cp);
	return ret;
}
Exemple #18
0
/*
 *	Handle ICMP messages in the outside-to-inside direction (incoming)
 *	and sometimes in outgoing direction from ip_vs_forward_icmp.
 *	Find any that might be relevant, check against existing connections,
 *	forward to the right destination host if relevant.
 *	Currently handles error types - unreachable, quench, ttl exceeded.
 */
static int ip_vs_in_icmp(struct sk_buff **skb_p)
{
	struct sk_buff	*skb   = *skb_p;
	struct iphdr    *iph;
	struct icmphdr  *icmph;
	struct iphdr    *ciph;	/* The ip header contained within the ICMP */
	__u16	        *pptr;	/* port numbers from TCP/UDP contained header */
	unsigned short   len;
	unsigned short	clen, csize;
	struct ip_vs_conn *cp;
	struct rtable *rt;			/* Route to the other host */
	int    mtu;

	if (skb_is_nonlinear(skb)) {
		if (skb_linearize(skb, GFP_ATOMIC) != 0)
			return NF_DROP;
	}

	iph = skb->nh.iph;
	ip_send_check(iph);
	icmph = (struct icmphdr *)((char *)iph + (iph->ihl << 2));
	len = ntohs(iph->tot_len) - (iph->ihl<<2);
	if (len < sizeof(struct icmphdr))
		return NF_DROP;

	IP_VS_DBG(12, "icmp in (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n",
		  icmph->type, ntohs(icmp_id(icmph)),
		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));

	if ((icmph->type != ICMP_DEST_UNREACH) &&
	    (icmph->type != ICMP_SOURCE_QUENCH) &&
	    (icmph->type != ICMP_TIME_EXCEEDED))
		return NF_ACCEPT;

	/*
	 * If we get here we have an ICMP error of one of the above 3 types
	 * Now find the contained IP header
	 */
	clen = len - sizeof(struct icmphdr);
	if (clen < sizeof(struct iphdr))
		return NF_DROP;
	ciph = (struct iphdr *) (icmph + 1);
	csize = ciph->ihl << 2;
	if (clen < csize)
		return NF_DROP;

	/* We are only interested ICMPs generated from TCP or UDP packets */
	if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	/* Skip non-first embedded TCP/UDP fragments */
	if (ciph->frag_off & __constant_htons(IP_OFFSET))
		return NF_ACCEPT;

	/* We need at least TCP/UDP ports here */
	if (clen < csize + sizeof(struct udphdr))
		return NF_DROP;

	/* Ensure the checksum is correct */
	if (ip_compute_csum((unsigned char *) icmph, len)) {
		/* Failed checksum! */
		IP_VS_ERR_RL("incoming ICMP: failed checksum from "
			     "%d.%d.%d.%d!\n", NIPQUAD(iph->saddr));
		return NF_DROP;
	}

	pptr = (__u16 *)&(((char *)ciph)[csize]);

	IP_VS_DBG(11, "Handling incoming ICMP for "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

	/* This is pretty much what ip_vs_conn_in_get() does,
	   except parameters are in the reverse order */
	cp = ip_vs_conn_in_get(ciph->protocol,
			       ciph->daddr, pptr[1],
			       ciph->saddr, pptr[0]);
	if (cp == NULL)
		return NF_ACCEPT;

	ip_vs_in_stats(cp, skb);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		int ret;
		if (cp->packet_xmit)
			ret = cp->packet_xmit(skb, cp);
		else
			ret = NF_ACCEPT;
		atomic_inc(&cp->in_pkts);
		ip_vs_conn_put(cp);
		return ret;
	}

	/*
	 * mangle and send the packet here
	 */
	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
		goto tx_error_icmp;

	/* MTU checking */
	mtu = rt->u.dst.pmtu;
	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
		ip_rt_put(rt);
		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
		goto tx_error;
	}

	/* drop old route */
	dst_release(skb->dst);
	skb->dst = &rt->u.dst;

	/* copy-on-write the packet before mangling it */
	if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len,
			  &iph, (unsigned char**)&icmph)) {
		ip_vs_conn_put(cp);
		return NF_DROP;
	}
	ciph = (struct iphdr *) (icmph + 1);
	pptr = (__u16 *)&(((char *)ciph)[csize]);

	/* The ICMP packet for VS/NAT must be written to correct addresses
	   before being forwarded to the right server */

	/* First change the dest IP address, and recalc checksum */
	iph->daddr = cp->daddr;
	ip_send_check(iph);

	/* Now change the *source* address in the contained IP */
	ciph->saddr = cp->daddr;
	ip_send_check(ciph);

	/* the TCP/UDP source port - cannot redo check */
	pptr[0] = cp->dport;

	/* And finally the ICMP checksum */
	icmph->checksum = 0;
	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
	skb->ip_summed = CHECKSUM_UNNECESSARY;

	IP_VS_DBG(11, "Forwarding incoming ICMP to "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

#ifdef CONFIG_NETFILTER_DEBUG
	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
#endif /* CONFIG_NETFILTER_DEBUG */
	ip_send(skb);
	ip_vs_conn_put(cp);
	return NF_STOLEN;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	dev_kfree_skb(skb);
	ip_vs_conn_put(cp);
	return NF_STOLEN;
}
Exemple #19
0
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, int rt_mode, __be32 *ret_saddr)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct ip_vs_dest_dst *dest_dst;
	struct rtable *rt;			/* Route to the other host */
	struct rtable *ort;			/* Original route */
	struct iphdr *iph;
	__be16 df;
	int mtu;
	int local, noref = 1;

	if (dest) {
		dest_dst = __ip_vs_dst_check(dest);
		if (likely(dest_dst))
			rt = (struct rtable *) dest_dst->dst_cache;
		else {
			dest_dst = ip_vs_dest_dst_alloc();
			spin_lock_bh(&dest->dst_lock);
			if (!dest_dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				goto err_unreach;
			}
			rt = do_output_route4(net, dest->addr.ip, rt_mode,
					      &dest_dst->dst_saddr.ip);
			if (!rt) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				ip_vs_dest_dst_free(dest_dst);
				goto err_unreach;
			}
			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
			spin_unlock_bh(&dest->dst_lock);
			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
				  atomic_read(&rt->dst.__refcnt));
		}
		daddr = dest->addr.ip;
		if (ret_saddr)
			*ret_saddr = dest_dst->dst_saddr.ip;
	} else {
		__be32 saddr = htonl(INADDR_ANY);

		noref = 0;

		/* For such unconfigured boxes avoid many route lookups
		 * for performance reasons because we do not remember saddr
		 */
		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
		rt = do_output_route4(net, daddr, rt_mode, &saddr);
		if (!rt)
			goto err_unreach;
		if (ret_saddr)
			*ret_saddr = saddr;
	}

	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
			     (rt->rt_flags & RTCF_LOCAL) ?
			     "local":"non-local", &daddr);
		goto err_put;
	}
	iph = ip_hdr(skb);
	if (likely(!local)) {
		if (unlikely(ipv4_is_loopback(iph->saddr))) {
			IP_VS_DBG_RL("Stopping traffic from loopback address "
				     "%pI4 to non-local address, dest: %pI4\n",
				     &iph->saddr, &daddr);
			goto err_put;
		}
	} else {
		ort = skb_rtable(skb);
		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
		    !(ort->rt_flags & RTCF_LOCAL)) {
			IP_VS_DBG_RL("Redirect from non-local address %pI4 to "
				     "local requires NAT method, dest: %pI4\n",
				     &iph->daddr, &daddr);
			goto err_put;
		}
		/* skb to local stack, preserve old route */
		if (!noref)
			ip_rt_put(rt);
		return local;
	}

	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
		mtu = dst_mtu(&rt->dst);
		df = iph->frag_off & htons(IP_DF);
	} else {
		struct sock *sk = skb->sk;

		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
		if (mtu < 68) {
			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
			goto err_put;
		}
		ort = skb_rtable(skb);
		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
		/* MTU check allowed? */
		df = sysctl_pmtu_disc(ipvs) ? iph->frag_off & htons(IP_DF) : 0;
	}

	/* MTU checking */
	if (unlikely(df && skb->len > mtu && !skb_is_gso(skb))) {
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG(1, "frag needed for %pI4\n", &iph->saddr);
		goto err_put;
	}

	skb_dst_drop(skb);
	if (noref) {
		if (!local)
			skb_dst_set_noref_force(skb, &rt->dst);
		else
			skb_dst_set(skb, dst_clone(&rt->dst));
	} else
		skb_dst_set(skb, &rt->dst);

	return local;

err_put:
	if (!noref)
		ip_rt_put(rt);
	return -1;

err_unreach:
	dst_link_failure(skb);
	return -1;
}
Exemple #20
0
/* Get route to destination or remote server */
static int
__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
		   __be32 daddr, int rt_mode, __be32 *ret_saddr,
		   struct ip_vs_iphdr *ipvsh)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct ip_vs_dest_dst *dest_dst;
	struct rtable *rt;			/* Route to the other host */
	int mtu;
	int local, noref = 1;

	if (dest) {
		dest_dst = __ip_vs_dst_check(dest);
		if (likely(dest_dst))
			rt = (struct rtable *) dest_dst->dst_cache;
		else {
			dest_dst = ip_vs_dest_dst_alloc();
			spin_lock_bh(&dest->dst_lock);
			if (!dest_dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				goto err_unreach;
			}
			rt = do_output_route4(net, dest->addr.ip, rt_mode,
					      &dest_dst->dst_saddr.ip);
			if (!rt) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				ip_vs_dest_dst_free(dest_dst);
				goto err_unreach;
			}
			__ip_vs_dst_set(dest, dest_dst, &rt->dst, 0);
			spin_unlock_bh(&dest->dst_lock);
			IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d\n",
				  &dest->addr.ip, &dest_dst->dst_saddr.ip,
				  atomic_read(&rt->dst.__refcnt));
		}
		if (ret_saddr)
			*ret_saddr = dest_dst->dst_saddr.ip;
	} else {
		__be32 saddr = htonl(INADDR_ANY);

		noref = 0;

		/* For such unconfigured boxes avoid many route lookups
		 * for performance reasons because we do not remember saddr
		 */
		rt_mode &= ~IP_VS_RT_MODE_CONNECT;
		rt = do_output_route4(net, daddr, rt_mode, &saddr);
		if (!rt)
			goto err_unreach;
		if (ret_saddr)
			*ret_saddr = saddr;
	}

	local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0;
	if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
						  local))) {
		IP_VS_DBG_RL("We are crossing local and non-local addresses"
			     " daddr=%pI4\n", &daddr);
		goto err_put;
	}

	if (unlikely(local)) {
		/* skb to local stack, preserve old route */
		if (!noref)
			ip_rt_put(rt);
		return local;
	}

	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL))) {
		mtu = dst_mtu(&rt->dst);
	} else {
		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
		if (mtu < 68) {
			IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
			goto err_put;
		}
		maybe_update_pmtu(skb_af, skb, mtu);
	}

	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
		goto err_put;

	skb_dst_drop(skb);
	if (noref) {
		if (!local)
			skb_dst_set_noref(skb, &rt->dst);
		else
			skb_dst_set(skb, dst_clone(&rt->dst));
	} else
		skb_dst_set(skb, &rt->dst);

	return local;

err_put:
	if (!noref)
		ip_rt_put(rt);
	return -1;

err_unreach:
	dst_link_failure(skb);
	return -1;
}
Exemple #21
0
/* Get route to destination or remote server */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
                   __be32 daddr, u32 rtos, int rt_mode)
{
    struct net *net = dev_net(skb_dst(skb)->dev);
    struct rtable *rt;			/* Route to the other host */
    struct rtable *ort;			/* Original route */
    int local;

    if (dest) {
        spin_lock(&dest->dst_lock);
        if (!(rt = (struct rtable *)
                   __ip_vs_dst_check(dest, rtos))) {
            rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0);
            if (IS_ERR(rt)) {
                spin_unlock(&dest->dst_lock);
                IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
                             &dest->addr.ip);
                return NULL;
            }
            __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
            IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
                      &dest->addr.ip,
                      atomic_read(&rt->dst.__refcnt), rtos);
        }
        spin_unlock(&dest->dst_lock);
    } else {
        rt = ip_route_output(net, daddr, 0, rtos, 0);
        if (IS_ERR(rt)) {
            IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
                         &daddr);
            return NULL;
        }
    }

    local = rt->rt_flags & RTCF_LOCAL;
    if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
            rt_mode)) {
        IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
                     (rt->rt_flags & RTCF_LOCAL) ?
                     "local":"non-local", &rt->rt_dst);
        ip_rt_put(rt);
        return NULL;
    }
    if (local && !(rt_mode & IP_VS_RT_MODE_RDR) &&
            !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) {
        IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
                     "requires NAT method, dest: %pI4\n",
                     &ip_hdr(skb)->daddr, &rt->rt_dst);
        ip_rt_put(rt);
        return NULL;
    }
    if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
        IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
                     "to non-local address, dest: %pI4\n",
                     &ip_hdr(skb)->saddr, &rt->rt_dst);
        ip_rt_put(rt);
        return NULL;
    }

    return rt;
}
Exemple #22
0
/*
 * Syn-proxy step 3 logic: receive syn-ack from rs
 * Update syn_proxy_seq.delta and send stored ack skbs
 * to rs.
 */
int
ip_vs_synproxy_synack_rcv(struct sk_buff *skb, struct ip_vs_conn *cp,
			  struct ip_vs_protocol *pp, int ihl, int *verdict)
{
	struct tcphdr _tcph, *th;
	struct sk_buff_head save_skb;
	struct sk_buff *tmp_skb = NULL;
	struct ip_vs_dest *dest = cp->dest;

	th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph);
	if (th == NULL) {
		*verdict = NF_DROP;
		return 0;
	}

	IP_VS_DBG(6, "in syn_proxy_synack_rcv, "
		  "seq = %u ack_seq = %u %c%c%c cp->is_synproxy = %u cp->state = %u\n",
		  ntohl(th->seq),
		  ntohl(th->ack_seq),
		  (th->syn) ? 'S' : '-',
		  (th->ack) ? 'A' : '-',
		  (th->rst) ? 'R' : '-',
		  cp->flags & IP_VS_CONN_F_SYNPROXY, cp->state);

	skb_queue_head_init(&save_skb);
	spin_lock(&cp->lock);
	if ((th->syn) && (th->ack) && (!th->rst) &&
	    (cp->flags & IP_VS_CONN_F_SYNPROXY) &&
	    cp->state == IP_VS_TCP_S_SYN_SENT) {
		cp->syn_proxy_seq.delta =
		    htonl(cp->syn_proxy_seq.init_seq) - htonl(th->seq);
		cp->timeout = pp->timeout_table[cp->state =
						IP_VS_TCP_S_ESTABLISHED];
		if (dest) {
			atomic_inc(&dest->activeconns);
			atomic_dec(&dest->inactconns);
			cp->flags &= ~IP_VS_CONN_F_INACTIVE;
		}

		/* save tcp sequense for fullnat/nat, INside to OUTside */
		if (sysctl_ip_vs_conn_expire_tcp_rst == 1) {
			cp->rs_end_seq = htonl(ntohl(th->seq) + 1);
			cp->rs_ack_seq = th->ack_seq;
			IP_VS_DBG_RL("packet from RS, seq:%u ack_seq:%u.",
				     ntohl(th->seq), ntohl(th->ack_seq));
			IP_VS_DBG_RL("port:%u->%u", ntohs(th->source),
				     ntohs(th->dest));
		}

		/* First: free stored syn skb */
		if ((tmp_skb = xchg(&cp->syn_skb, NULL)) != NULL) {
			kfree_skb(tmp_skb);
			tmp_skb = NULL;
		}

		if (skb_queue_len(&cp->ack_skb) <= 0) {
			/*
			 * FIXME: maybe a bug here, print err msg and go.
			 * Attention: cp->state has been changed and we
			 * should still DROP the Syn/Ack skb.
			 */
			IP_VS_ERR_RL
			    ("Got ack_skb NULL pointer in syn_proxy_synack_rcv\n");
			spin_unlock(&cp->lock);
			*verdict = NF_DROP;
			return 0;
		}

		while ((tmp_skb = skb_dequeue(&cp->ack_skb)) != NULL) {
			skb_queue_tail(&save_skb, tmp_skb);
		}

		/*
		 * Release the lock, because we don't
		 * touch session any more.
		 */
		spin_unlock(&cp->lock);

		while ((tmp_skb = skb_dequeue(&save_skb)) != NULL) {
			/* If xmit failed, syn_skb will be freed correctly. */
			cp->packet_xmit(tmp_skb, cp, pp);
		}

		*verdict = NF_DROP;
		return 0;
	} else if ((th->rst) &&
		   (cp->flags & IP_VS_CONN_F_SYNPROXY) &&
		   cp->state == IP_VS_TCP_S_SYN_SENT) {
		__u32 temp_seq;
		temp_seq = ntohl(th->seq);
		IP_VS_DBG(6, "get rst from rs, seq = %u ack_seq= %u\n",
			  ntohl(th->seq), ntohl(th->ack_seq));
		/* coute the delta of seq */
		cp->syn_proxy_seq.delta =
		    ntohl(cp->syn_proxy_seq.init_seq) - ntohl(th->seq);
		cp->timeout = pp->timeout_table[cp->state = IP_VS_TCP_S_CLOSE];
		spin_unlock(&cp->lock);
		th->seq = htonl(ntohl(th->seq) + 1);
		syn_proxy_seq_csum_update(th, htonl(temp_seq), th->seq);

		return 1;
	}
	spin_unlock(&cp->lock);

	return 1;
}
Exemple #23
0
/*
 * Get route to destination or remote server
 */
static int
__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct ip_vs_dest_dst *dest_dst;
	struct rt6_info *rt;			/* Route to the other host */
	struct rt6_info *ort;			/* Original route */
	struct dst_entry *dst;
	int mtu;
	int local, noref = 1;

	if (dest) {
		dest_dst = __ip_vs_dst_check(dest);
		if (likely(dest_dst))
			rt = (struct rt6_info *) dest_dst->dst_cache;
		else {
			u32 cookie;

			dest_dst = ip_vs_dest_dst_alloc();
			spin_lock_bh(&dest->dst_lock);
			if (!dest_dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				goto err_unreach;
			}
			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
						      &dest_dst->dst_saddr.in6,
						      do_xfrm);
			if (!dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				ip_vs_dest_dst_free(dest_dst);
				goto err_unreach;
			}
			rt = (struct rt6_info *) dst;
			cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
			spin_unlock_bh(&dest->dst_lock);
			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
				  &dest->addr.in6, &dest_dst->dst_saddr.in6,
				  atomic_read(&rt->dst.__refcnt));
		}
		if (ret_saddr)
			*ret_saddr = dest_dst->dst_saddr.in6;
	} else {
		noref = 0;
		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
		if (!dst)
			goto err_unreach;
		rt = (struct rt6_info *) dst;
	}

	local = __ip_vs_is_local_route6(rt);
	if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) &
	      rt_mode)) {
		IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6c\n",
			     local ? "local":"non-local", daddr);
		goto err_put;
	}
	if (likely(!local)) {
		if (unlikely((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
			     ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
					    IPV6_ADDR_LOOPBACK)) {
			IP_VS_DBG_RL("Stopping traffic from loopback address "
				     "%pI6c to non-local address, "
				     "dest: %pI6c\n",
				     &ipv6_hdr(skb)->saddr, daddr);
			goto err_put;
		}
	} else {
		ort = (struct rt6_info *) skb_dst(skb);
		if (!(rt_mode & IP_VS_RT_MODE_RDR) &&
		    !__ip_vs_is_local_route6(ort)) {
			IP_VS_DBG_RL("Redirect from non-local address %pI6c "
				     "to local requires NAT method, "
				     "dest: %pI6c\n",
				     &ipv6_hdr(skb)->daddr, daddr);
			goto err_put;
		}
		/* skb to local stack, preserve old route */
		if (!noref)
			dst_release(&rt->dst);
		return local;
	}

	/* MTU checking */
	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
		mtu = dst_mtu(&rt->dst);
	else {
		struct sock *sk = skb->sk;

		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
		if (mtu < IPV6_MIN_MTU) {
			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
				     IPV6_MIN_MTU);
			goto err_put;
		}
		ort = (struct rt6_info *) skb_dst(skb);
		if (!skb->dev && sk && sk->sk_state != TCP_TIME_WAIT)
			ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
	}

	if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
		if (!skb->dev)
			skb->dev = net->loopback_dev;
		/* only send ICMP too big on first fragment */
		if (!ipvsh->fragoffs)
			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
		IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr);
		goto err_put;
	}

	skb_dst_drop(skb);
	if (noref) {
		if (!local)
			skb_dst_set_noref_force(skb, &rt->dst);
		else
			skb_dst_set(skb, dst_clone(&rt->dst));
	} else
		skb_dst_set(skb, &rt->dst);

	return local;

err_put:
	if (!noref)
		dst_release(&rt->dst);
	return -1;

err_unreach:
	dst_link_failure(skb);
	return -1;
}
Exemple #24
0
/*
 * Get route to destination or remote server
 */
static int
__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
		      struct in6_addr *daddr, struct in6_addr *ret_saddr,
		      struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
	struct net *net = dev_net(skb_dst(skb)->dev);
	struct ip_vs_dest_dst *dest_dst;
	struct rt6_info *rt;			/* Route to the other host */
	struct dst_entry *dst;
	int mtu;
	int local, noref = 1;

	if (dest) {
		dest_dst = __ip_vs_dst_check(dest);
		if (likely(dest_dst))
			rt = (struct rt6_info *) dest_dst->dst_cache;
		else {
			u32 cookie;

			dest_dst = ip_vs_dest_dst_alloc();
			spin_lock_bh(&dest->dst_lock);
			if (!dest_dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				goto err_unreach;
			}
			dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
						      &dest_dst->dst_saddr.in6,
						      do_xfrm, rt_mode);
			if (!dst) {
				__ip_vs_dst_set(dest, NULL, NULL, 0);
				spin_unlock_bh(&dest->dst_lock);
				ip_vs_dest_dst_free(dest_dst);
				goto err_unreach;
			}
			rt = (struct rt6_info *) dst;
			cookie = rt6_get_cookie(rt);
			__ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie);
			spin_unlock_bh(&dest->dst_lock);
			IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
				  &dest->addr.in6, &dest_dst->dst_saddr.in6,
				  atomic_read(&rt->dst.__refcnt));
		}
		if (ret_saddr)
			*ret_saddr = dest_dst->dst_saddr.in6;
	} else {
		noref = 0;
		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm,
					      rt_mode);
		if (!dst)
			goto err_unreach;
		rt = (struct rt6_info *) dst;
	}

	local = __ip_vs_is_local_route6(rt);

	if (unlikely(crosses_local_route_boundary(skb_af, skb, rt_mode,
						  local))) {
		IP_VS_DBG_RL("We are crossing local and non-local addresses"
			     " daddr=%pI6\n", daddr);
		goto err_put;
	}

	if (unlikely(local)) {
		/* skb to local stack, preserve old route */
		if (!noref)
			dst_release(&rt->dst);
		return local;
	}

	/* MTU checking */
	if (likely(!(rt_mode & IP_VS_RT_MODE_TUNNEL)))
		mtu = dst_mtu(&rt->dst);
	else {
		mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
		if (mtu < IPV6_MIN_MTU) {
			IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
				     IPV6_MIN_MTU);
			goto err_put;
		}
		maybe_update_pmtu(skb_af, skb, mtu);
	}

	if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
		goto err_put;

	skb_dst_drop(skb);
	if (noref) {
		if (!local)
			skb_dst_set_noref(skb, &rt->dst);
		else
			skb_dst_set(skb, dst_clone(&rt->dst));
	} else
		skb_dst_set(skb, &rt->dst);

	return local;

err_put:
	if (!noref)
		dst_release(&rt->dst);
	return -1;

err_unreach:
	/* The ip6_link_failure function requires the dev field to be set
	 * in order to get the net (further for the sake of fwmark
	 * reflection).
	 */
	if (!skb->dev)
		skb->dev = skb_dst(skb)->dev;

	dst_link_failure(skb);
	return -1;
}