Beispiel #1
0
/*
 *      Source Hashing scheduling
 */
static struct ip_vs_dest *
ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
	struct ip_vs_dest *dest;
	struct ip_vs_sh_state *s;
	struct ip_vs_iphdr iph;

	ip_vs_fill_iph_addr_only(svc->af, skb, &iph);

	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");

	s = (struct ip_vs_sh_state *) svc->sched_data;
	dest = ip_vs_sh_get(svc->af, s, &iph.saddr);
	if (!dest
	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
	    || atomic_read(&dest->weight) <= 0
	    || is_overloaded(dest)) {
		ip_vs_scheduler_err(svc, "no destination available");
		return NULL;
	}

	IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
		      IP_VS_DBG_ADDR(svc->af, &iph.saddr),
		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
		      ntohs(dest->port));

	return dest;
}
Beispiel #2
0
/*
 *      Source Hashing scheduling
 */
static struct ip_vs_dest *
ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
		  struct ip_vs_iphdr *iph)
{
	struct ip_vs_dest *dest;
	struct ip_vs_sh_state *s;
	__be16 port = 0;

	IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");

	if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
		port = ip_vs_sh_get_port(skb, iph);

	s = (struct ip_vs_sh_state *) svc->sched_data;

	if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
		dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
	else
		dest = ip_vs_sh_get(svc, s, &iph->saddr, port);

	if (!dest) {
		ip_vs_scheduler_err(svc, "no destination available");
		return NULL;
	}

	IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
		      IP_VS_DBG_ADDR(svc->af, &iph->saddr),
		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
		      ntohs(dest->port));

	return dest;
}
Beispiel #3
0
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
		    struct ip_vs_protocol *pp,
		    const struct ip_vs_iphdr *iph,
		    unsigned int proto_off,
		    int inverse)
{
	struct ip_vs_conn *cp;

	if (likely(!inverse)) {
		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
					&iph->saddr,
					htons(PORT_ISAKMP),
					&iph->daddr,
					htons(PORT_ISAKMP));
	} else {
		cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
					&iph->daddr,
					htons(PORT_ISAKMP),
					&iph->saddr,
					htons(PORT_ISAKMP));
	}

	if (!cp) {
		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
			      "%s%s %s->%s\n",
			      inverse ? "ICMP+" : "",
			      pp->name,
			      IP_VS_DBG_ADDR(af, &iph->saddr),
			      IP_VS_DBG_ADDR(af, &iph->daddr));
	}

	return cp;
}
static struct ip_vs_dest *
ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
	struct ip_vs_dest *dest;
	struct ip_vs_dh_bucket *tbl;
	struct ip_vs_iphdr iph;

	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);

	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
	dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
	if (!dest
	    || !(dest->flags & IP_VS_DEST_F_AVAILABLE)
	    || atomic_read(&dest->weight) <= 0
	    || is_overloaded(dest)) {
		return NULL;
	}

	IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n",
		      IP_VS_DBG_ADDR(svc->af, &iph.daddr),
		      IP_VS_DBG_ADDR(svc->af, &dest->addr),
		      ntohs(dest->port));

	return dest;
}
Beispiel #5
0
static struct ip_vs_conn *
ah_esp_conn_in_get(int af, const struct sk_buff *skb,
		   const struct ip_vs_iphdr *iph, unsigned int proto_off,
		   int inverse)
{
	struct ip_vs_conn *cp;
	struct ip_vs_conn_param p;
	struct net *net = skb_net(skb);

	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
	cp = ip_vs_conn_in_get(&p);
	if (!cp) {
		/*
		 * We are not sure if the packet is from our
		 * service, so our conn_schedule hook should return NF_ACCEPT
		 */
		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
			      "%s%s %s->%s\n",
			      inverse ? "ICMP+" : "",
			      ip_vs_proto_get(iph->protocol)->name,
			      IP_VS_DBG_ADDR(af, &iph->saddr),
			      IP_VS_DBG_ADDR(af, &iph->daddr));
	}

	return cp;
}
Beispiel #6
0
static struct ip_vs_conn *ah_esp_conn_in_get(int af, const struct sk_buff *skb,
					     struct ip_vs_protocol *pp,
					     const struct ip_vs_iphdr *iph,
					     unsigned int proto_off,
					     int inverse, int *res_dir)
{
	struct ip_vs_conn *cp;

	if (likely(!inverse)) {
		cp = ip_vs_conn_get(af, IPPROTO_UDP,
				    &iph->saddr,
				    htons(PORT_ISAKMP),
				    &iph->daddr, htons(PORT_ISAKMP), res_dir);
	} else {
		cp = ip_vs_conn_get(af, IPPROTO_UDP,
				    &iph->daddr,
				    htons(PORT_ISAKMP),
				    &iph->saddr, htons(PORT_ISAKMP), res_dir);
	}

	if (!cp) {
		/*
		 * We are not sure if the packet is from our
		 * service, so our conn_schedule hook should return NF_ACCEPT
		 */
		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
			      "%s%s %s->%s\n",
			      inverse ? "ICMP+" : "",
			      pp->name,
			      IP_VS_DBG_ADDR(af, &iph->saddr),
			      IP_VS_DBG_ADDR(af, &iph->daddr));
	}

	return cp;
}
static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
				  struct ip_vs_conn *ct)

{
	bool ret = false;

	if (ct->af == p->af &&
	    ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
	    ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
			     p->vaddr, &ct->vaddr) &&
	    ct->vport == p->vport &&
	    ct->flags & IP_VS_CONN_F_TEMPLATE &&
	    ct->protocol == p->protocol &&
	    ct->pe_data && ct->pe_data_len == p->pe_data_len &&
	    !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
		ret = true;

	IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
		      ip_vs_proto_name(p->protocol),
		      IP_VS_DEBUG_CALLID(p->pe_data, p->pe_data_len),
		      IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
		      ret ? "hit" : "not hit");

	return ret;
}
Beispiel #8
0
/* get weighted least-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
{
	register struct ip_vs_dest_list *e;
	struct ip_vs_dest *dest, *least;
	int loh, doh;

	if (set == NULL)
		return NULL;

	/* select the first destination server, whose weight > 0 */
	for (e=set->list; e!=NULL; e=e->next) {
		least = e->dest;
		if (least->flags & IP_VS_DEST_F_OVERLOAD)
			continue;

		if ((atomic_read(&least->weight) > 0)
		    && (least->flags & IP_VS_DEST_F_AVAILABLE)) {
			loh = atomic_read(&least->activeconns) * 50
				+ atomic_read(&least->inactconns);
			goto nextstage;
		}
	}
	return NULL;

	/* find the destination with the weighted least load */
  nextstage:
	for (e=e->next; e!=NULL; e=e->next) {
		dest = e->dest;
		if (dest->flags & IP_VS_DEST_F_OVERLOAD)
			continue;

		doh = atomic_read(&dest->activeconns) * 50
			+ atomic_read(&dest->inactconns);
		if ((loh * atomic_read(&dest->weight) >
		     doh * atomic_read(&least->weight))
		    && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
			least = dest;
			loh = doh;
		}
	}

	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
		      "activeconns %d refcnt %d weight %d overhead %d\n",
		      __func__,
		      IP_VS_DBG_ADDR(least->af, &least->addr),
		      ntohs(least->port),
		      atomic_read(&least->activeconns),
		      atomic_read(&least->refcnt),
		      atomic_read(&least->weight), loh);
	return least;
}
Beispiel #9
0
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
		    const struct ip_vs_iphdr *iph,
		    unsigned int proto_off,
		    int inverse)
{
	struct ip_vs_conn *cp;
	struct ip_vs_conn_param p;
	struct net *net = skb_net(skb);

	ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
	cp = ip_vs_conn_out_get(&p);
	if (!cp) {
		IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
			      "%s%s %s->%s\n",
			      inverse ? "ICMP+" : "",
			      ip_vs_proto_get(iph->protocol)->name,
			      IP_VS_DBG_ADDR(af, &iph->saddr),
			      IP_VS_DBG_ADDR(af, &iph->daddr));
	}

	return cp;
}
Beispiel #10
0
/* get weighted most-connection node in the destination set */
static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
{
	register struct ip_vs_dest_list *e;
	struct ip_vs_dest *dest, *most;
	int moh, doh;

	if (set == NULL)
		return NULL;

	/* select the first destination server, whose weight > 0 */
	for (e=set->list; e!=NULL; e=e->next) {
		most = e->dest;
		if (atomic_read(&most->weight) > 0) {
			moh = atomic_read(&most->activeconns) * 50
				+ atomic_read(&most->inactconns);
			goto nextstage;
		}
	}
	return NULL;

	/* find the destination with the weighted most load */
  nextstage:
	for (e=e->next; e!=NULL; e=e->next) {
		dest = e->dest;
		doh = atomic_read(&dest->activeconns) * 50
			+ atomic_read(&dest->inactconns);
		/* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
		if ((moh * atomic_read(&dest->weight) <
		     doh * atomic_read(&most->weight))
		    && (atomic_read(&dest->weight) > 0)) {
			most = dest;
			moh = doh;
		}
	}

	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
		      "activeconns %d refcnt %d weight %d overhead %d\n",
		      __func__,
		      IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
		      atomic_read(&most->activeconns),
		      atomic_read(&most->refcnt),
		      atomic_read(&most->weight), moh);
	return most;
}
Beispiel #11
0
/*
 *      Assign all the hash buckets of the specified table with the service.
 */
static int
ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc)
{
	int i;
	struct ip_vs_sh_bucket *b;
	struct list_head *p;
	struct ip_vs_dest *dest;
	int d_count;
	bool empty;

	b = &s->buckets[0];
	p = &svc->destinations;
	empty = list_empty(p);
	d_count = 0;
	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
		dest = rcu_dereference_protected(b->dest, 1);
		if (dest)
			ip_vs_dest_put(dest);
		if (empty)
			RCU_INIT_POINTER(b->dest, NULL);
		else {
			if (p == &svc->destinations)
				p = p->next;

			dest = list_entry(p, struct ip_vs_dest, n_list);
			ip_vs_dest_hold(dest);
			RCU_INIT_POINTER(b->dest, dest);

			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
				      i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
				      atomic_read(&dest->weight));

			/* Don't move to next dest until filling weight */
			if (++d_count >= atomic_read(&dest->weight)) {
				p = p->next;
				d_count = 0;
			}

		}
		b++;
	}
	return 0;
}
Beispiel #12
0
/*
 * Round-Robin Scheduling
 */
static struct ip_vs_dest *
ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
	struct list_head *p, *q;
	struct ip_vs_dest *dest;

	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);

	write_lock(&svc->sched_lock);
	p = (struct list_head *)svc->sched_data;
	p = p->next;
	q = p;
	do {
		/* skip list head */
		if (q == &svc->destinations) {
			q = q->next;
			continue;
		}

		dest = list_entry(q, struct ip_vs_dest, n_list);
		if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
		    atomic_read(&dest->weight) > 0)
			/* HIT */
			goto out;
		q = q->next;
	} while (q != p);
	write_unlock(&svc->sched_lock);
	ip_vs_scheduler_err(svc, "no destination available");
	return NULL;

  out:
	svc->sched_data = q;
	write_unlock(&svc->sched_lock);
	IP_VS_DBG_BUF(6, "RR: server %s:%u "
		      "activeconns %d refcnt %d weight %d\n",
		      IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port),
		      atomic_read(&dest->activeconns),
		      atomic_read(&dest->refcnt), atomic_read(&dest->weight));

	return dest;
}
/*
 *      Assign all the hash buckets of the specified table with the service.
 */
static int
ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)
{
	int i;
	struct ip_vs_sh_bucket *b;
	struct list_head *p;
	struct ip_vs_dest *dest;
	int d_count;

	b = tbl;
	p = &svc->destinations;
	d_count = 0;
	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {
		if (list_empty(p)) {
			b->dest = NULL;
		} else {
			if (p == &svc->destinations)
				p = p->next;

			dest = list_entry(p, struct ip_vs_dest, n_list);
			atomic_inc(&dest->refcnt);
			b->dest = dest;

			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n",
				      i, IP_VS_DBG_ADDR(svc->af, &dest->addr),
				      atomic_read(&dest->weight));

			/* Don't move to next dest until filling weight */
			if (++d_count >= atomic_read(&dest->weight)) {
				p = p->next;
				d_count = 0;
			}

		}
		b++;
	}
	return 0;
}
Beispiel #14
0
/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
static inline struct ip_vs_dest *
ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
		      const union nf_inet_addr *addr, __be16 port)
{
	unsigned int offset;
	unsigned int hash;
	struct ip_vs_dest *dest;

	for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
		hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
		dest = rcu_dereference(s->buckets[hash].dest);
		if (!dest)
			break;
		if (is_unavailable(dest))
			IP_VS_DBG_BUF(6, "SH: selected unavailable server "
				      "%s:%d (offset %d)",
				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
				      ntohs(dest->port), offset);
		else
			return dest;
	}

	return NULL;
}
Beispiel #15
0
/*
 *  IPVS persistent scheduling function
 *  It creates a connection entry according to its template if exists,
 *  or selects a server and creates a connection entry plus a template.
 *  Locking: we are svc user (svc->refcnt), so we hold all dests too
 *  Protocols supported: TCP, UDP
 */
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
		    struct sk_buff *skb, __be16 src_port, __be16 dst_port,
		    int *ignored, struct ip_vs_iphdr *iph)
{
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_dest *dest;
	struct ip_vs_conn *ct;
	__be16 dport = 0;		/* destination port to forward */
	unsigned int flags;
	struct ip_vs_conn_param param;
	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
	union nf_inet_addr snet;	/* source network of the client,
					   after masking */

	/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
	if (svc->af == AF_INET6)
		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
				 (__force __u32) svc->netmask);
	else
#endif
		snet.ip = iph->saddr.ip & svc->netmask;

	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
		      "mnet %s\n",
		      IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
		      IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
		      IP_VS_DBG_ADDR(svc->af, &snet));

	/*
	 * As far as we know, FTP is a very complicated network protocol, and
	 * it uses control connection and data connections. For active FTP,
	 * FTP server initialize data connection to the client, its source port
	 * is often 20. For passive FTP, FTP server tells the clients the port
	 * that it passively listens to,  and the client issues the data
	 * connection. In the tunneling or direct routing mode, the load
	 * balancer is on the client-to-server half of connection, the port
	 * number is unknown to the load balancer. So, a conn template like
	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
	 * is created for other persistent services.
	 */
	{
		int protocol = iph->protocol;
		const union nf_inet_addr *vaddr = &iph->daddr;
		__be16 vport = 0;

		if (dst_port == svc->port) {
			/* non-FTP template:
			 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
			 * FTP template:
			 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
			 */
			if (svc->port != FTPPORT)
				vport = dst_port;
		} else {
			/* Note: persistent fwmark-based services and
			 * persistent port zero service are handled here.
			 * fwmark template:
			 * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
			 * port zero template:
			 * <protocol,caddr,0,vaddr,0,daddr,0>
			 */
			if (svc->fwmark) {
				protocol = IPPROTO_IP;
				vaddr = &fwmark;
			}
		}
		/* return *ignored = -1 so NF_DROP can be used */
		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
						  vaddr, vport, &param) < 0) {
			*ignored = -1;
			return NULL;
		}
	}

	/* Check if a template already exists */
	ct = ip_vs_ct_in_get(&param);
	if (!ct || !ip_vs_check_template(ct)) {
		struct ip_vs_scheduler *sched;

		/*
		 * No template found or the dest of the connection
		 * template is not available.
		 * return *ignored=0 i.e. ICMP and NF_DROP
		 */
		sched = rcu_dereference(svc->scheduler);
		if (sched) {
			/* read svc->sched_data after svc->scheduler */
			smp_rmb();
			dest = sched->schedule(svc, skb, iph);
		} else {
			dest = NULL;
		}
		if (!dest) {
			IP_VS_DBG(1, "p-schedule: no dest found.\n");
			kfree(param.pe_data);
			*ignored = 0;
			return NULL;
		}

		if (dst_port == svc->port && svc->port != FTPPORT)
			dport = dest->port;

		/* Create a template
		 * This adds param.pe_data to the template,
		 * and thus param.pe_data will be destroyed
		 * when the template expires */
		ct = ip_vs_conn_new(&param, dest->af, &dest->addr, dport,
				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
		if (ct == NULL) {
			kfree(param.pe_data);
			*ignored = -1;
			return NULL;
		}

		ct->timeout = svc->timeout;
	} else {
		/* set destination with the found template */
		dest = ct->dest;
		kfree(param.pe_data);
	}

	dport = dst_port;
	if (dport == svc->port && dest->port)
		dport = dest->port;

	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
		 && iph->protocol == IPPROTO_UDP) ?
		IP_VS_CONN_F_ONE_PACKET : 0;

	/*
	 *    Create a new connection according to the template
	 */
	ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
			      src_port, &iph->daddr, dst_port, &param);

	cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
			    skb->mark);
	if (cp == NULL) {
		ip_vs_conn_put(ct);
		*ignored = -1;
		return NULL;
	}

	/*
	 *    Add its control
	 */
	ip_vs_control_add(cp, ct);
	ip_vs_conn_put(ct);

	ip_vs_conn_stats(cp, svc);
	return cp;
}


/*
 *  IPVS main scheduling function
 *  It selects a server according to the virtual service, and
 *  creates a connection entry.
 *  Protocols supported: TCP, UDP
 *
 *  Usage of *ignored
 *
 * 1 :   protocol tried to schedule (eg. on SYN), found svc but the
 *       svc/scheduler decides that this packet should be accepted with
 *       NF_ACCEPT because it must not be scheduled.
 *
 * 0 :   scheduler can not find destination, so try bypass or
 *       return ICMP and then NF_DROP (ip_vs_leave).
 *
 * -1 :  scheduler tried to schedule but fatal error occurred, eg.
 *       ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
 *       failure such as missing Call-ID, ENOMEM on skb_linearize
 *       or pe_data. In this case we should return NF_DROP without
 *       any attempts to send ICMP with ip_vs_leave.
 */
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
	       struct ip_vs_proto_data *pd, int *ignored,
	       struct ip_vs_iphdr *iph)
{
	struct ip_vs_protocol *pp = pd->pp;
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_scheduler *sched;
	struct ip_vs_dest *dest;
	__be16 _ports[2], *pptr;
	unsigned int flags;

	*ignored = 1;
	/*
	 * IPv6 frags, only the first hit here.
	 */
	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
	if (pptr == NULL)
		return NULL;

	/*
	 * FTPDATA needs this check when using local real server.
	 * Never schedule Active FTPDATA connections from real server.
	 * For LVS-NAT they must be already created. For other methods
	 * with persistence the connection is created on SYN+ACK.
	 */
	if (pptr[0] == FTPDATA) {
		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
			      "Not scheduling FTPDATA");
		return NULL;
	}

	/*
	 *    Do not schedule replies from local real server.
	 */
	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
	    (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
			      "Not scheduling reply for existing connection");
		__ip_vs_conn_put(cp);
		return NULL;
	}

	/*
	 *    Persistent service
	 */
	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
		return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
					   iph);

	*ignored = 0;

	/*
	 *    Non-persistent service
	 */
	if (!svc->fwmark && pptr[1] != svc->port) {
		if (!svc->port)
			pr_err("Schedule: port zero only supported "
			       "in persistent services, "
			       "check your ipvs configuration\n");
		return NULL;
	}

	sched = rcu_dereference(svc->scheduler);
	if (sched) {
		/* read svc->sched_data after svc->scheduler */
		smp_rmb();
		dest = sched->schedule(svc, skb, iph);
	} else {
		dest = NULL;
	}
	if (dest == NULL) {
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		return NULL;
	}

	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
		 && iph->protocol == IPPROTO_UDP) ?
		IP_VS_CONN_F_ONE_PACKET : 0;

	/*
	 *    Create a connection entry.
	 */
	{
		struct ip_vs_conn_param p;

		ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
				      &iph->saddr, pptr[0], &iph->daddr,
				      pptr[1], &p);
		cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
				    dest->port ? dest->port : pptr[1],
				    flags, dest, skb->mark);
		if (!cp) {
			*ignored = -1;
			return NULL;
		}
	}

	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
		      ip_vs_fwd_tag(cp),
		      IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport),
		      IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport),
		      IP_VS_DBG_ADDR(cp->daf, &cp->daddr), ntohs(cp->dport),
		      cp->flags, atomic_read(&cp->refcnt));

	ip_vs_conn_stats(cp, svc);
	return cp;
}


/*
 *  Pass or drop the packet.
 *  Called by ip_vs_in, when the virtual service is available but
 *  no destination is available for a new connection.
 */
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
		struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{
	__be16 _ports[2], *pptr;
#ifdef CONFIG_SYSCTL
	struct net *net;
	struct netns_ipvs *ipvs;
	int unicast;
#endif

	pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
	if (pptr == NULL) {
		return NF_DROP;
	}

#ifdef CONFIG_SYSCTL
	net = skb_net(skb);

#ifdef CONFIG_IP_VS_IPV6
	if (svc->af == AF_INET6)
		unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
	else
#endif
		unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);

	/* if it is fwmark-based service, the cache_bypass sysctl is up
	   and the destination is a non-local unicast, then create
	   a cache_bypass connection entry */
	ipvs = net_ipvs(net);
	if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
		int ret;
		struct ip_vs_conn *cp;
		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
				      iph->protocol == IPPROTO_UDP) ?
				      IP_VS_CONN_F_ONE_PACKET : 0;
		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };

		/* create a new connection entry */
		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
		{
			struct ip_vs_conn_param p;
			ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
					      &iph->saddr, pptr[0],
					      &iph->daddr, pptr[1], &p);
			cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
					    IP_VS_CONN_F_BYPASS | flags,
					    NULL, skb->mark);
			if (!cp)
				return NF_DROP;
		}

		/* statistics */
		ip_vs_in_stats(cp, skb);

		/* set state */
		ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);

		/* transmit the first SYN packet */
		ret = cp->packet_xmit(skb, cp, pd->pp, iph);
		/* do not touch skb anymore */

		atomic_inc(&cp->in_pkts);
		ip_vs_conn_put(cp);
		return ret;
	}
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
		    struct sk_buff *skb,
		    __be16 src_port, __be16 dst_port, int *ignored)
{
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_iphdr iph;
	struct ip_vs_dest *dest;
	struct ip_vs_conn *ct;
	__be16 dport = 0;		
	unsigned int flags;
	struct ip_vs_conn_param param;
	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
	union nf_inet_addr snet;	

	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

	
#ifdef CONFIG_IP_VS_IPV6
	if (svc->af == AF_INET6)
		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
	else
#endif
		snet.ip = iph.saddr.ip & svc->netmask;

	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
		      "mnet %s\n",
		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
		      IP_VS_DBG_ADDR(svc->af, &snet));

	{
		int protocol = iph.protocol;
		const union nf_inet_addr *vaddr = &iph.daddr;
		__be16 vport = 0;

		if (dst_port == svc->port) {
			if (svc->port != FTPPORT)
				vport = dst_port;
		} else {
			if (svc->fwmark) {
				protocol = IPPROTO_IP;
				vaddr = &fwmark;
			}
		}
		
		if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
						  vaddr, vport, &param) < 0) {
			*ignored = -1;
			return NULL;
		}
	}

	
	ct = ip_vs_ct_in_get(&param);
	if (!ct || !ip_vs_check_template(ct)) {
		dest = svc->scheduler->schedule(svc, skb);
		if (!dest) {
			IP_VS_DBG(1, "p-schedule: no dest found.\n");
			kfree(param.pe_data);
			*ignored = 0;
			return NULL;
		}

		if (dst_port == svc->port && svc->port != FTPPORT)
			dport = dest->port;

		ct = ip_vs_conn_new(&param, &dest->addr, dport,
				    IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
		if (ct == NULL) {
			kfree(param.pe_data);
			*ignored = -1;
			return NULL;
		}

		ct->timeout = svc->timeout;
	} else {
		
		dest = ct->dest;
		kfree(param.pe_data);
	}

	dport = dst_port;
	if (dport == svc->port && dest->port)
		dport = dest->port;

	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
		 && iph.protocol == IPPROTO_UDP)?
		IP_VS_CONN_F_ONE_PACKET : 0;

	ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
			      src_port, &iph.daddr, dst_port, &param);

	cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark);
	if (cp == NULL) {
		ip_vs_conn_put(ct);
		*ignored = -1;
		return NULL;
	}

	ip_vs_control_add(cp, ct);
	ip_vs_conn_put(ct);

	ip_vs_conn_stats(cp, svc);
	return cp;
}


struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
	       struct ip_vs_proto_data *pd, int *ignored)
{
	struct ip_vs_protocol *pp = pd->pp;
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_iphdr iph;
	struct ip_vs_dest *dest;
	__be16 _ports[2], *pptr;
	unsigned int flags;

	*ignored = 1;
	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
	if (pptr == NULL)
		return NULL;

	if (pptr[0] == FTPDATA) {
		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
			      "Not scheduling FTPDATA");
		return NULL;
	}

	if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
	    (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
		IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
			      "Not scheduling reply for existing connection");
		__ip_vs_conn_put(cp);
		return NULL;
	}

	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
		return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);

	*ignored = 0;

	if (!svc->fwmark && pptr[1] != svc->port) {
		if (!svc->port)
			pr_err("Schedule: port zero only supported "
			       "in persistent services, "
			       "check your ipvs configuration\n");
		return NULL;
	}

	dest = svc->scheduler->schedule(svc, skb);
	if (dest == NULL) {
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		return NULL;
	}

	flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
		 && iph.protocol == IPPROTO_UDP)?
		IP_VS_CONN_F_ONE_PACKET : 0;

	{
		struct ip_vs_conn_param p;

		ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
				      &iph.saddr, pptr[0], &iph.daddr, pptr[1],
				      &p);
		cp = ip_vs_conn_new(&p, &dest->addr,
				    dest->port ? dest->port : pptr[1],
				    flags, dest, skb->mark);
		if (!cp) {
			*ignored = -1;
			return NULL;
		}
	}

	IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
		      "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
		      ip_vs_fwd_tag(cp),
		      IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport),
		      IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport),
		      IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport),
		      cp->flags, atomic_read(&cp->refcnt));

	ip_vs_conn_stats(cp, svc);
	return cp;
}


int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
		struct ip_vs_proto_data *pd)
{
	__be16 _ports[2], *pptr;
	struct ip_vs_iphdr iph;
#ifdef CONFIG_SYSCTL
	struct net *net;
	struct netns_ipvs *ipvs;
	int unicast;
#endif

	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

	pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
	if (pptr == NULL) {
		ip_vs_service_put(svc);
		return NF_DROP;
	}

#ifdef CONFIG_SYSCTL
	net = skb_net(skb);

#ifdef CONFIG_IP_VS_IPV6
	if (svc->af == AF_INET6)
		unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
	else
#endif
		unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);

	ipvs = net_ipvs(net);
	if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
		int ret;
		struct ip_vs_conn *cp;
		unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
				      iph.protocol == IPPROTO_UDP)?
				      IP_VS_CONN_F_ONE_PACKET : 0;
		union nf_inet_addr daddr =  { .all = { 0, 0, 0, 0 } };

		ip_vs_service_put(svc);

		
		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
		{
			struct ip_vs_conn_param p;
			ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
					      &iph.saddr, pptr[0],
					      &iph.daddr, pptr[1], &p);
			cp = ip_vs_conn_new(&p, &daddr, 0,
					    IP_VS_CONN_F_BYPASS | flags,
					    NULL, skb->mark);
			if (!cp)
				return NF_DROP;
		}

		
		ip_vs_in_stats(cp, skb);

		
		ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);

		
		ret = cp->packet_xmit(skb, cp, pd->pp);
		

		atomic_inc(&cp->in_pkts);
		ip_vs_conn_put(cp);
		return ret;
	}
Beispiel #17
0
/*
 *  IPVS persistent scheduling function
 *  It creates a connection entry according to its template if exists,
 *  or selects a server and creates a connection entry plus a template.
 *  Locking: we are svc user (svc->refcnt), so we hold all dests too
 *  Protocols supported: TCP, UDP
 */
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
		    const struct sk_buff *skb,
		    __be16 ports[2])
{
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_iphdr iph;
	struct ip_vs_dest *dest;
	struct ip_vs_conn *ct;
	__be16  dport;			/* destination port to forward */
	union nf_inet_addr snet;	/* source network of the client,
					   after masking */

	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);

	/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
	if (svc->af == AF_INET6)
		ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask);
	else
#endif
		snet.ip = iph.saddr.ip & svc->netmask;

	IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
		      "mnet %s\n",
		      IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
		      IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
		      IP_VS_DBG_ADDR(svc->af, &snet));

	/*
	 * As far as we know, FTP is a very complicated network protocol, and
	 * it uses control connection and data connections. For active FTP,
	 * FTP server initialize data connection to the client, its source port
	 * is often 20. For passive FTP, FTP server tells the clients the port
	 * that it passively listens to,  and the client issues the data
	 * connection. In the tunneling or direct routing mode, the load
	 * balancer is on the client-to-server half of connection, the port
	 * number is unknown to the load balancer. So, a conn template like
	 * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP
	 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
	 * is created for other persistent services.
	 */
	if (ports[1] == svc->port) {
		/* Check if a template already exists */
		if (svc->port != FTPPORT)
			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
					     &iph.daddr, ports[1]);
		else
			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
					     &iph.daddr, 0);

		if (!ct || !ip_vs_check_template(ct)) {
			/*
			 * No template found or the dest of the connection
			 * template is not available.
			 */
			dest = svc->scheduler->schedule(svc, skb);
			if (dest == NULL) {
				IP_VS_DBG(1, "p-schedule: no dest found.\n");
				return NULL;
			}

			/*
			 * Create a template like <protocol,caddr,0,
			 * vaddr,vport,daddr,dport> for non-ftp service,
			 * and <protocol,caddr,0,vaddr,0,daddr,0>
			 * for ftp service.
			 */
			if (svc->port != FTPPORT)
				ct = ip_vs_conn_new(svc->af, iph.protocol,
						    &snet, 0,
						    &iph.daddr,
						    ports[1],
						    &dest->addr, dest->port,
						    IP_VS_CONN_F_TEMPLATE,
						    dest);
			else
				ct = ip_vs_conn_new(svc->af, iph.protocol,
						    &snet, 0,
						    &iph.daddr, 0,
						    &dest->addr, 0,
						    IP_VS_CONN_F_TEMPLATE,
						    dest);
			if (ct == NULL)
				return NULL;

			ct->timeout = svc->timeout;
		} else {
			/* set destination with the found template */
			dest = ct->dest;
		}
		dport = dest->port;
	} else {
		/*
		 * Note: persistent fwmark-based services and persistent
		 * port zero service are handled here.
		 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
		 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
		 */
		if (svc->fwmark) {
			union nf_inet_addr fwmark = {
				.ip = htonl(svc->fwmark)
			};

			ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
					     &fwmark, 0);
		} else
			ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
					     &iph.daddr, 0);

		if (!ct || !ip_vs_check_template(ct)) {
			/*
			 * If it is not persistent port zero, return NULL,
			 * otherwise create a connection template.
			 */
			if (svc->port)
				return NULL;

			dest = svc->scheduler->schedule(svc, skb);
			if (dest == NULL) {
				IP_VS_DBG(1, "p-schedule: no dest found.\n");
				return NULL;
			}

			/*
			 * Create a template according to the service
			 */
			if (svc->fwmark) {
				union nf_inet_addr fwmark = {
					.ip = htonl(svc->fwmark)
				};

				ct = ip_vs_conn_new(svc->af, IPPROTO_IP,
						    &snet, 0,
						    &fwmark, 0,
						    &dest->addr, 0,
						    IP_VS_CONN_F_TEMPLATE,
						    dest);
			} else
				ct = ip_vs_conn_new(svc->af, iph.protocol,
						    &snet, 0,
						    &iph.daddr, 0,
						    &dest->addr, 0,
						    IP_VS_CONN_F_TEMPLATE,
						    dest);
			if (ct == NULL)
				return NULL;

			ct->timeout = svc->timeout;
		} else {
			/* set destination with the found template */
			dest = ct->dest;
		}
		dport = ports[1];
	}
Beispiel #18
0
static inline void
set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
	      int direction, struct tcphdr *th)
{
	int state_idx;
	int new_state = IP_VS_TCP_S_CLOSE;
	int state_off = tcp_state_off[direction];

	/*
	 *    Update state offset to INPUT_ONLY if necessary
	 *    or delete NO_OUTPUT flag if output packet detected
	 */
	if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
		if (state_off == TCP_DIR_OUTPUT)
			cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
		else
			state_off = TCP_DIR_INPUT_ONLY;
	}

	if ((state_idx = tcp_state_idx(th)) < 0) {
		IP_VS_DBG(8, "tcp_state_idx=%d!!!\n", state_idx);
		goto tcp_state_out;
	}

	new_state =
		pd->tcp_state_table[state_off+state_idx].next_state[cp->state];

  tcp_state_out:
	if (new_state != cp->state) {
		struct ip_vs_dest *dest = cp->dest;

		IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
			      "%s:%d state: %s->%s conn->refcnt:%d\n",
			      pd->pp->name,
			      ((state_off == TCP_DIR_OUTPUT) ?
			       "output " : "input "),
			      th->syn ? 'S' : '.',
			      th->fin ? 'F' : '.',
			      th->ack ? 'A' : '.',
			      th->rst ? 'R' : '.',
			      IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
			      ntohs(cp->dport),
			      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
			      ntohs(cp->cport),
			      tcp_state_name(cp->state),
			      tcp_state_name(new_state),
			      atomic_read(&cp->refcnt));

		if (dest) {
			if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
			    !tcp_state_active(new_state)) {
				atomic_dec(&dest->activeconns);
				atomic_inc(&dest->inactconns);
				cp->flags |= IP_VS_CONN_F_INACTIVE;
			} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
				   tcp_state_active(new_state)) {
				atomic_inc(&dest->activeconns);
				atomic_dec(&dest->inactconns);
				cp->flags &= ~IP_VS_CONN_F_INACTIVE;
			}
		}
	}

	if (likely(pd))
		cp->timeout = pd->timeout_table[cp->state = new_state];
	else	/* What to do ? */
		cp->timeout = tcp_timeouts[cp->state = new_state];
}