示例#1
0
/*
 *      Bypass transmitter
 *      Let packets bypass the destination when the destination is not
 *      available, it may be only used in transparent cache cluster.
 */
int
ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct iphdr  *iph = ip_hdr(skb);

	EnterFunction(10);

	rcu_read_lock();
	if (__ip_vs_get_out_rt(skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL,
			       NULL) < 0)
		goto tx_error;

	ip_send_check(iph);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
	rcu_read_unlock();

	LeaveFunction(10);
	return NF_STOLEN;

 tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
示例#2
0
/*
 *      Direct Routing transmitter
 *      Used for ANY protocol
 */
int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
              struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct iphdr  *iph = ip_hdr(skb);
    int    mtu;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(iph->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        ip_rt_put(rt);
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error;
    }

    /*
     * Call ip_send_check because we are not sure it is called
     * after ip_defrag. Is copy-on-write needed?
     */
    if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
        ip_rt_put(rt);
        return NF_STOLEN;
    }
    ip_send_check(ip_hdr(skb));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);

    LeaveFunction(10);
    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
}
示例#3
0
/*
 *      Direct Routing transmitter
 *      Used for ANY protocol
 */
int
ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
	      struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	int local;

	EnterFunction(10);

	rcu_read_lock();
	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_KNOWN_NH, NULL);
	if (local < 0)
		goto tx_error;
	if (local) {
		rcu_read_unlock();
		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
	}

	ip_send_check(ip_hdr(skb));

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
	rcu_read_unlock();

	LeaveFunction(10);
	return NF_STOLEN;

  tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
示例#4
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                  struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct net_device *tdev;		/* Device to other host */
    struct iphdr  *old_iph = ip_hdr(skb);
    u8     tos = old_iph->tos;
    __be16 df = old_iph->frag_off;
    struct iphdr  *iph;			/* Our new IP header */
    unsigned int max_headroom;		/* The extra header space needed */
    int    mtu;
    int ret;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    tdev = rt->dst.dev;

    mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
    if (mtu < 68) {
        IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
        goto tx_error_put;
    }
    if (skb_dst(skb))
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

    df |= (old_iph->frag_off & htons(IP_DF));

    if ((old_iph->frag_off & htons(IP_DF) &&
            mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

    if (skb_headroom(skb) < max_headroom
            || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb =
            skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            ip_rt_put(rt);
            kfree_skb(skb);
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NF_STOLEN;
        }
        kfree_skb(skb);
        skb = new_skb;
        old_iph = ip_hdr(skb);
    }

    skb->transport_header = skb->network_header;

    /* fix old IP header checksum */
    ip_send_check(old_iph);

    skb_push(skb, sizeof(struct iphdr));
    skb_reset_network_header(skb);
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /*
     *	Push down and install the IPIP header.
     */
    iph			=	ip_hdr(skb);
    iph->version		=	4;
    iph->ihl		=	sizeof(struct iphdr)>>2;
    iph->frag_off		=	df;
    iph->protocol		=	IPPROTO_IPIP;
    iph->tos		=	tos;
    iph->daddr		=	rt->rt_dst;
    iph->saddr		=	rt->rt_src;
    iph->ttl		=	old_iph->ttl;
    ip_select_ident(iph, &rt->dst, NULL);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    ret = IP_VS_XMIT_TUNNEL(skb, cp);
    if (ret == NF_ACCEPT)
        ip_local_out(skb);
    else if (ret == NF_DROP)
        kfree_skb(skb);

    LeaveFunction(10);

    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
示例#5
0
/*
 *      NAT transmitter (only for outside-to-inside nat forwarding)
 *      Not used for related ICMP
 */
int
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
               struct ip_vs_protocol *pp)
{
    struct rtable *rt;		/* Route to the other host */
    int mtu;
    struct iphdr *iph = ip_hdr(skb);
    int local;

    EnterFunction(10);

    /* check if it is a connection of no-client-port */
    if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
        __be16 _pt, *p;
        p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
        if (p == NULL)
            goto tx_error;
        ip_vs_conn_fill_cport(cp, *p);
        IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
    }

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(iph->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL |
                                  IP_VS_RT_MODE_RDR)))
        goto tx_error_icmp;
    local = rt->rt_flags & RTCF_LOCAL;
    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
                             "ip_vs_nat_xmit(): "
                             "stopping DNAT to local address");
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && ipv4_is_loopback(rt->rt_dst) &&
            rt_is_input_route(skb_rtable(skb))) {
        IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
                         "stopping DNAT to loopback address");
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
                         "ip_vs_nat_xmit(): frag needed for");
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, sizeof(struct iphdr)))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    /* mangle the packet */
    if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
        goto tx_error_put;
    ip_hdr(skb)->daddr = cp->daddr.ip;
    ip_send_check(ip_hdr(skb));

    if (!local) {
        /* drop old route */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        ip_rt_put(rt);
        /*
         * Some IPv4 replies get local address from routes,
         * not from iph, so while we DNAT after routing
         * we need this second input/output route.
         */
        if (!__ip_vs_reroute_locally(skb))
            goto tx_error;
    }

    IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");

    /* FIXME: when application helper enlarges the packet and the length
       is larger than the MTU of outgoing device, there will be still
       MTU problem. */

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

    LeaveFunction(10);
    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
示例#6
0
/*
 *	ICMP packet transmitter
 *	called by the ip_vs_in_icmp
 */
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                struct ip_vs_protocol *pp, int offset)
{
    struct rtable	*rt;	/* Route to the other host */
    int mtu;
    int rc;
    int local;

    EnterFunction(10);

    /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
       forwarded directly here, because there is no need to
       translate address/port back */
    if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
        if (cp->packet_xmit)
            rc = cp->packet_xmit(skb, cp, pp);
        else
            rc = NF_ACCEPT;
        /* do not touch skb anymore */
        atomic_inc_unchecked(&cp->in_pkts);
        goto out;
    }

    /*
     * mangle and send the packet here (only for VS/NAT)
     */

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(ip_hdr(skb)->tos),
                                  IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL |
                                  IP_VS_RT_MODE_RDR)))
        goto tx_error_icmp;
    local = rt->rt_flags & RTCF_LOCAL;

    /*
     * Avoid duplicate tuple in reply direction for NAT traffic
     * to local address when connection is sync-ed
     */
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
    if (cp->flags & IP_VS_CONN_F_SYNC && local) {
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);

        if (ct && !nf_ct_is_untracked(ct)) {
            IP_VS_DBG(10, "%s(): "
                      "stopping DNAT to local address %pI4\n",
                      __func__, &cp->daddr.ip);
            goto tx_error_put;
        }
    }
#endif

    /* From world but DNAT to loopback address? */
    if (local && ipv4_is_loopback(rt->rt_dst) &&
            rt_is_input_route(skb_rtable(skb))) {
        IP_VS_DBG(1, "%s(): "
                  "stopping DNAT to loopback %pI4\n",
                  __func__, &cp->daddr.ip);
        goto tx_error_put;
    }

    /* MTU checking */
    mtu = dst_mtu(&rt->dst);
    if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&
            !skb_is_gso(skb)) {
        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /* copy-on-write the packet before mangling it */
    if (!skb_make_writable(skb, offset))
        goto tx_error_put;

    if (skb_cow(skb, rt->dst.dev->hard_header_len))
        goto tx_error_put;

    ip_vs_nat_icmp(skb, pp, cp, 0);

    if (!local) {
        /* drop the old route when skb is not shared */
        skb_dst_drop(skb);
        skb_dst_set(skb, &rt->dst);
    } else {
        ip_rt_put(rt);
        /*
         * Some IPv4 replies get local address from routes,
         * not from iph, so while we DNAT after routing
         * we need this second input/output route.
         */
        if (!__ip_vs_reroute_locally(skb))
            goto tx_error;
    }

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);

    rc = NF_STOLEN;
    goto out;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    dev_kfree_skb(skb);
    rc = NF_STOLEN;
out:
    LeaveFunction(10);
    return rc;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
示例#7
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
	struct rtable *rt;			/* Route to the other host */
	__be32 saddr;				/* Source for tunnel */
	struct net_device *tdev;		/* Device to other host */
	struct iphdr  *old_iph = ip_hdr(skb);
	u8     tos = old_iph->tos;
	__be16 df;
	struct iphdr  *iph;			/* Our new IP header */
	unsigned int max_headroom;		/* The extra header space needed */
	int ret, local;

	EnterFunction(10);

	rcu_read_lock();
	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_CONNECT |
				   IP_VS_RT_MODE_TUNNEL, &saddr);
	if (local < 0)
		goto tx_error;
	if (local) {
		rcu_read_unlock();
		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
	}

	rt = skb_rtable(skb);
	tdev = rt->dst.dev;

	/* Copy DF, reset fragment offset and MF */
	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
		struct sk_buff *new_skb =
			skb_realloc_headroom(skb, max_headroom);

		if (!new_skb)
			goto tx_error;
		consume_skb(skb);
		skb = new_skb;
		old_iph = ip_hdr(skb);
	}

	skb->transport_header = skb->network_header;

	/* fix old IP header checksum */
	ip_send_check(old_iph);

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ip_hdr(skb);
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	tos;
	iph->daddr		=	cp->daddr.ip;
	iph->saddr		=	saddr;
	iph->ttl		=	old_iph->ttl;
	ip_select_ident(skb, &rt->dst, NULL);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
	if (ret == NF_ACCEPT)
		ip_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);
	rcu_read_unlock();

	LeaveFunction(10);

	return NF_STOLEN;

  tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
示例#8
0
/*
 *      NAT transmitter (only for outside-to-inside nat forwarding)
 *      Not used for related ICMP
 */
int
ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
	       struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct rtable *rt;		/* Route to the other host */
	int local, rc, was_input;

	EnterFunction(10);

	rcu_read_lock();
	/* check if it is a connection of no-client-port */
	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
		__be16 _pt, *p;

		p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt);
		if (p == NULL)
			goto tx_error;
		ip_vs_conn_fill_cport(cp, *p);
		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
	}

	was_input = rt_is_input_route(skb_rtable(skb));
	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_RDR, NULL);
	if (local < 0)
		goto tx_error;
	rt = skb_rtable(skb);
	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
					 "ip_vs_nat_xmit(): "
					 "stopping DNAT to local address");
			goto tx_error;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
		IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
				 "stopping DNAT to loopback address");
		goto tx_error;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, sizeof(struct iphdr)))
		goto tx_error;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error;

	/* mangle the packet */
	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh))
		goto tx_error;
	ip_hdr(skb)->daddr = cp->daddr.ip;
	ip_send_check(ip_hdr(skb));

	IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");

	/* FIXME: when application helper enlarges the packet and the length
	   is larger than the MTU of outgoing device, there will be still
	   MTU problem. */

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
	rcu_read_unlock();

	LeaveFunction(10);
	return rc;

  tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
示例#9
0
/*
 *	ICMP packet transmitter
 *	called by the ip_vs_in_icmp
 */
int
ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		struct ip_vs_protocol *pp, int offset, unsigned int hooknum,
		struct ip_vs_iphdr *iph)
{
	struct rtable	*rt;	/* Route to the other host */
	int rc;
	int local;
	int rt_mode, was_input;

	EnterFunction(10);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		if (cp->packet_xmit)
			rc = cp->packet_xmit(skb, cp, pp, iph);
		else
			rc = NF_ACCEPT;
		/* do not touch skb anymore */
		atomic_inc_unchecked(&cp->in_pkts);
		goto out;
	}

	/*
	 * mangle and send the packet here (only for VS/NAT)
	 */
	was_input = rt_is_input_route(skb_rtable(skb));

	/* LOCALNODE from FORWARD hook is not supported */
	rt_mode = (hooknum != NF_INET_FORWARD) ?
		  IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
		  IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
	rcu_read_lock();
	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, rt_mode, NULL);
	if (local < 0)
		goto tx_error;
	rt = skb_rtable(skb);

	/*
	 * Avoid duplicate tuple in reply direction for NAT traffic
	 * to local address when connection is sync-ed
	 */
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
	if (cp->flags & IP_VS_CONN_F_SYNC && local) {
		enum ip_conntrack_info ctinfo;
		struct nf_conn *ct = nf_ct_get(skb, &ctinfo);

		if (ct && !nf_ct_is_untracked(ct)) {
			IP_VS_DBG(10, "%s(): "
				  "stopping DNAT to local address %pI4\n",
				  __func__, &cp->daddr.ip);
			goto tx_error;
		}
	}
#endif

	/* From world but DNAT to loopback address? */
	if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
		IP_VS_DBG(1, "%s(): "
			  "stopping DNAT to loopback %pI4\n",
			  __func__, &cp->daddr.ip);
		goto tx_error;
	}

	/* copy-on-write the packet before mangling it */
	if (!skb_make_writable(skb, offset))
		goto tx_error;

	if (skb_cow(skb, rt->dst.dev->hard_header_len))
		goto tx_error;

	ip_vs_nat_icmp(skb, pp, cp, 0);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
	rcu_read_unlock();
	goto out;

  tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	rc = NF_STOLEN;
  out:
	LeaveFunction(10);
	return rc;
}
示例#10
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct net *net = skb_net(skb);
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct rtable *rt;			/* Route to the other host */
	__be32 saddr;				/* Source for tunnel */
	struct net_device *tdev;		/* Device to other host */
	__u8 next_protocol = 0;
	__u8 dsfield = 0;
	__u8 ttl = 0;
	__be16 df = 0;
	__be16 *dfp = NULL;
	struct iphdr  *iph;			/* Our new IP header */
	unsigned int max_headroom;		/* The extra header space needed */
	int ret, local;

	EnterFunction(10);

	rcu_read_lock();
	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_CONNECT |
				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
	if (local < 0)
		goto tx_error;
	if (local) {
		rcu_read_unlock();
		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
	}

	rt = skb_rtable(skb);
	tdev = rt->dst.dev;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

	/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
	dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
					 &next_protocol, NULL, &dsfield,
					 &ttl, dfp);
	if (IS_ERR(skb))
		goto tx_error;

	skb = iptunnel_handle_offloads(
		skb, false, __tun_gso_type_mask(AF_INET, cp->af));
	if (IS_ERR(skb))
		goto tx_error;

	skb->transport_header = skb->network_header;

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ip_hdr(skb);
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	next_protocol;
	iph->tos		=	dsfield;
	iph->daddr		=	cp->daddr.ip;
	iph->saddr		=	saddr;
	iph->ttl		=	ttl;
	ip_select_ident(net, skb, NULL);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->ignore_df = 1;

	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
	if (ret == NF_ACCEPT)
		ip_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);
	rcu_read_unlock();

	LeaveFunction(10);

	return NF_STOLEN;

  tx_error:
	if (!IS_ERR(skb))
		kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
示例#11
0
/*
 *	Handle ICMP messages in the outside-to-inside direction (incoming)
 *	and sometimes in outgoing direction from ip_vs_forward_icmp.
 *	Find any that might be relevant, check against existing connections,
 *	forward to the right destination host if relevant.
 *	Currently handles error types - unreachable, quench, ttl exceeded.
 */
static int ip_vs_in_icmp(struct sk_buff **skb_p)
{
	struct sk_buff	*skb   = *skb_p;
	struct iphdr    *iph;
	struct icmphdr  *icmph;
	struct iphdr    *ciph;	/* The ip header contained within the ICMP */
	__u16	        *pptr;	/* port numbers from TCP/UDP contained header */
	unsigned short   len;
	unsigned short	clen, csize;
	struct ip_vs_conn *cp;
	struct rtable *rt;			/* Route to the other host */
	int    mtu;

	if (skb_is_nonlinear(skb)) {
		if (skb_linearize(skb, GFP_ATOMIC) != 0)
			return NF_DROP;
	}

	iph = skb->nh.iph;
	ip_send_check(iph);
	icmph = (struct icmphdr *)((char *)iph + (iph->ihl << 2));
	len = ntohs(iph->tot_len) - (iph->ihl<<2);
	if (len < sizeof(struct icmphdr))
		return NF_DROP;

	IP_VS_DBG(12, "icmp in (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n",
		  icmph->type, ntohs(icmp_id(icmph)),
		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));

	if ((icmph->type != ICMP_DEST_UNREACH) &&
	    (icmph->type != ICMP_SOURCE_QUENCH) &&
	    (icmph->type != ICMP_TIME_EXCEEDED))
		return NF_ACCEPT;

	/*
	 * If we get here we have an ICMP error of one of the above 3 types
	 * Now find the contained IP header
	 */
	clen = len - sizeof(struct icmphdr);
	if (clen < sizeof(struct iphdr))
		return NF_DROP;
	ciph = (struct iphdr *) (icmph + 1);
	csize = ciph->ihl << 2;
	if (clen < csize)
		return NF_DROP;

	/* We are only interested ICMPs generated from TCP or UDP packets */
	if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	/* Skip non-first embedded TCP/UDP fragments */
	if (ciph->frag_off & __constant_htons(IP_OFFSET))
		return NF_ACCEPT;

	/* We need at least TCP/UDP ports here */
	if (clen < csize + sizeof(struct udphdr))
		return NF_DROP;

	/* Ensure the checksum is correct */
	if (ip_compute_csum((unsigned char *) icmph, len)) {
		/* Failed checksum! */
		IP_VS_ERR_RL("incoming ICMP: failed checksum from "
			     "%d.%d.%d.%d!\n", NIPQUAD(iph->saddr));
		return NF_DROP;
	}

	pptr = (__u16 *)&(((char *)ciph)[csize]);

	IP_VS_DBG(11, "Handling incoming ICMP for "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

	/* This is pretty much what ip_vs_conn_in_get() does,
	   except parameters are in the reverse order */
	cp = ip_vs_conn_in_get(ciph->protocol,
			       ciph->daddr, pptr[1],
			       ciph->saddr, pptr[0]);
	if (cp == NULL)
		return NF_ACCEPT;

	ip_vs_in_stats(cp, skb);

	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
	   forwarded directly here, because there is no need to
	   translate address/port back */
	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
		int ret;
		if (cp->packet_xmit)
			ret = cp->packet_xmit(skb, cp);
		else
			ret = NF_ACCEPT;
		atomic_inc(&cp->in_pkts);
		ip_vs_conn_put(cp);
		return ret;
	}

	/*
	 * mangle and send the packet here
	 */
	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
		goto tx_error_icmp;

	/* MTU checking */
	mtu = rt->u.dst.pmtu;
	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
		ip_rt_put(rt);
		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
		goto tx_error;
	}

	/* drop old route */
	dst_release(skb->dst);
	skb->dst = &rt->u.dst;

	/* copy-on-write the packet before mangling it */
	if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len,
			  &iph, (unsigned char**)&icmph)) {
		ip_vs_conn_put(cp);
		return NF_DROP;
	}
	ciph = (struct iphdr *) (icmph + 1);
	pptr = (__u16 *)&(((char *)ciph)[csize]);

	/* The ICMP packet for VS/NAT must be written to correct addresses
	   before being forwarded to the right server */

	/* First change the dest IP address, and recalc checksum */
	iph->daddr = cp->daddr;
	ip_send_check(iph);

	/* Now change the *source* address in the contained IP */
	ciph->saddr = cp->daddr;
	ip_send_check(ciph);

	/* the TCP/UDP source port - cannot redo check */
	pptr[0] = cp->dport;

	/* And finally the ICMP checksum */
	icmph->checksum = 0;
	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
	skb->ip_summed = CHECKSUM_UNNECESSARY;

	IP_VS_DBG(11, "Forwarding incoming ICMP to "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

#ifdef CONFIG_NETFILTER_DEBUG
	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
#endif /* CONFIG_NETFILTER_DEBUG */
	ip_send(skb);
	ip_vs_conn_put(cp);
	return NF_STOLEN;

  tx_error_icmp:
	dst_link_failure(skb);
  tx_error:
	dev_kfree_skb(skb);
	ip_vs_conn_put(cp);
	return NF_STOLEN;
}