Пример #1
0
/*
 *  Unbind a service with its scheduler
 */
int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
{
	struct ip_vs_scheduler *sched;

	if (svc == NULL) {
		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
		return -EINVAL;
	}

	sched = svc->scheduler;
	if (sched == NULL) {
		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
		return -EINVAL;
	}

	if (sched->done_service) {
		if (sched->done_service(svc) != 0) {
			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
			return -EINVAL;
		}
	}

	svc->scheduler = NULL;
	return 0;
}
Пример #2
0
/*
 *  Bind a service with a scheduler
 */
int ip_vs_bind_scheduler(struct ip_vs_service *svc,
			 struct ip_vs_scheduler *scheduler)
{
	int ret;

	if (svc == NULL) {
		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
		return -EINVAL;
	}
	if (scheduler == NULL) {
		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
		return -EINVAL;
	}

	svc->scheduler = scheduler;

	if (scheduler->init_service) {
		ret = scheduler->init_service(svc);
		if (ret) {
			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
			return ret;
		}
	}

	return 0;
}
Пример #3
0
/*
 *      Set up receiving multicast socket over UDP
 */
static struct socket * make_receive_sock(void)
{
	struct socket *sock;

	/* First create a socket */
	if (sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
		IP_VS_ERR("Error during creation of socket; terminating\n");
		return NULL;
	}

	/* it is equivalent to the REUSEADDR option in user-space */
	sock->sk->reuse = 1;

	if (sock->ops->bind(sock,
			    (struct sockaddr*)&mcast_addr,
			    sizeof(struct sockaddr)) < 0) {
		IP_VS_ERR("Error binding to the multicast addr\n");
		goto error;
	}

	/* join the multicast group */
	if (join_mcast_group(sock->sk,
			     (struct in_addr*)&mcast_addr.sin_addr,
			     ip_vs_mcast_ifn) < 0) {
		IP_VS_ERR("Error joining to the multicast group\n");
		goto error;
	}

	return sock;

  error:
	sock_release(sock);
	return NULL;
}
Пример #4
0
/*
 *  Unregister a scheduler from the scheduler list
 */
int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
	if (!scheduler) {
		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
		return -EINVAL;
	}

	write_lock_bh(&__ip_vs_sched_lock);
	if (scheduler->n_list.next == &scheduler->n_list) {
		write_unlock_bh(&__ip_vs_sched_lock);
		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
			  "is not in the list. failed\n", scheduler->name);
		return -EINVAL;
	}

	/*
	 *	Remove it from the d-linked scheduler list
	 */
	list_del(&scheduler->n_list);
	write_unlock_bh(&__ip_vs_sched_lock);

	MOD_DEC_USE_COUNT;

	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);

	return 0;
}
Пример #5
0
/*
 *      Process received multicast message and create the corresponding
 *      ip_vs_conn entries.
 */
static void ip_vs_process_message(const char *buffer, const size_t buflen)
{
	struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
	struct ip_vs_sync_conn *s;
	struct ip_vs_sync_conn_options *opt;
	struct ip_vs_conn *cp;
	char *p;
	int i;

	if (buflen != m->size) {
		IP_VS_ERR("bogus message\n");
		return;
	}

	p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
	for (i=0; i<m->nr_conns; i++) {
		s = (struct ip_vs_sync_conn *)p;
		cp = ip_vs_conn_in_get(s->protocol,
				       s->caddr, s->cport,
				       s->vaddr, s->vport);
		if (!cp) {
			cp = ip_vs_conn_new(s->protocol,
					    s->caddr, s->cport,
					    s->vaddr, s->vport,
					    s->daddr, s->dport,
					    ntohs(s->flags), NULL);
			if (!cp) {
				IP_VS_ERR("ip_vs_conn_new failed\n");
				return;
			}
			cp->state = ntohs(s->state);
		} else if (!cp->dest) {
			/* it is an entry created by the synchronization */
			cp->state = ntohs(s->state);
			cp->flags = ntohs(s->flags) | IP_VS_CONN_F_HASHED;
		}	/* Note that we don't touch its state and flags
			   if it is a normal entry. */

		if (ntohs(s->flags) & IP_VS_CONN_F_SEQ_MASK) {
			opt = (struct ip_vs_sync_conn_options *)&s[1];
			memcpy(&cp->in_seq, opt, sizeof(*opt));
			p += FULL_CONN_SIZE;
		} else
			p += SIMPLE_CONN_SIZE;

		atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold);
		cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
		ip_vs_conn_put(cp);

		if (p > buffer+buflen) {
			IP_VS_ERR("bogus message\n");
			return;
		}
	}
}
Пример #6
0
static void sync_master_loop(void)
{
	struct socket *sock;
	struct ip_vs_sync_buff *sb;
	struct ip_vs_sync_mesg *m;

	/* create the sending multicast socket */
	sock = make_send_sock();
	if (!sock)
		return;

	for (;;) {
		while ((sb=sb_dequeue())) {
			m = sb->mesg;
			if (ip_vs_send_async(sock, (char *)m,
					     m->size) != m->size)
				IP_VS_ERR("ip_vs_send_async error\n");
			ip_vs_sync_buff_release(sb);
		}

		/* check if entries stay in curr_sb for 2 seconds */
		if ((sb = get_curr_sync_buff(2*HZ))) {
			m = sb->mesg;
			if (ip_vs_send_async(sock, (char *)m,
					     m->size) != m->size)
				IP_VS_ERR("ip_vs_send_async error\n");
			ip_vs_sync_buff_release(sb);
		}

		if (stop_sync)
			break;

		__set_current_state(TASK_INTERRUPTIBLE);
		schedule_timeout(HZ);
		__set_current_state(TASK_RUNNING);
	}

	/* clean up the sync_buff queue */
	while ((sb=sb_dequeue())) {
		ip_vs_sync_buff_release(sb);
	}

	/* clean up the current sync_buff */
	if ((sb = get_curr_sync_buff(0))) {
		ip_vs_sync_buff_release(sb);
	}

	/* release the sending multicast socket */
	sock_release(sock);
}
Пример #7
0
/*
 *  Register a scheduler in the scheduler list
 */
int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
{
	struct ip_vs_scheduler *sched;

	if (!scheduler) {
		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
		return -EINVAL;
	}

	if (!scheduler->name) {
		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
		return -EINVAL;
	}

	MOD_INC_USE_COUNT;

	/*
	 *  Make sure that the scheduler with this name doesn't exist
	 *  in the scheduler list.
	 */
	sched = ip_vs_sched_getbyname(scheduler->name);
	if (sched) {
		ip_vs_scheduler_put(sched);
		MOD_DEC_USE_COUNT;
		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
			  "already existed in the system\n", scheduler->name);
		return -EINVAL;
	}

	write_lock_bh(&__ip_vs_sched_lock);

	if (scheduler->n_list.next != &scheduler->n_list) {
		write_unlock_bh(&__ip_vs_sched_lock);
		MOD_DEC_USE_COUNT;
		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
			  "already linked\n", scheduler->name);
		return -EINVAL;
	}

	/*
	 *	Add it into the d-linked scheduler list
	 */
	list_add(&scheduler->n_list, &ip_vs_schedulers);
	write_unlock_bh(&__ip_vs_sched_lock);

	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);

	return 0;
}
Пример #8
0
static struct ip_vs_dest_list *
ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
{
	struct ip_vs_dest_list *e;

	for (e=set->list; e!=NULL; e=e->next) {
		if (e->dest == dest)
			/* already existed */
			return NULL;
	}

	e = kmalloc(sizeof(*e), GFP_ATOMIC);
	if (e == NULL) {
		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
		return NULL;
	}

	atomic_inc(&dest->refcnt);
	e->dest = dest;

	/* link it to the list */
	e->next = set->list;
	set->list = e;
	atomic_inc(&set->size);

	set->lastmod = jiffies;
	return e;
}
Пример #9
0
int start_sync_thread(int state, char *mcast_ifn)
{
	DECLARE_COMPLETION(startup);
	pid_t pid;

	if (sync_pid)
		return -EEXIST;

	IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
	IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %d bytes\n",
		  sizeof(struct ip_vs_sync_conn));

	ip_vs_sync_state = state;
	strcpy(ip_vs_mcast_ifn, mcast_ifn);

  repeat:
	if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
		IP_VS_ERR("could not create fork_sync_thread due to %d... "
			  "retrying.\n", pid);
		current->state = TASK_UNINTERRUPTIBLE;
		schedule_timeout(HZ);
		goto repeat;
	}

	wait_for_completion(&startup);

	return 0;
}
Пример #10
0
/*
 *  IPVS main scheduling function
 *  It selects a server according to the virtual service, and
 *  creates a connection entry.
 *  Protocols supported: TCP, UDP
 */
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
{
	struct ip_vs_conn *cp = NULL;
	struct iphdr *iph = ip_hdr(skb);
	struct ip_vs_dest *dest;
	__be16 _ports[2], *pptr;

	pptr = skb_header_pointer(skb, iph->ihl*4,
				  sizeof(_ports), _ports);
	if (pptr == NULL)
		return NULL;

	/*
	 *    Persistent service
	 */
	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
		return ip_vs_sched_persist(svc, skb, pptr);

	/*
	 *    Non-persistent service
	 */
	if (!svc->fwmark && pptr[1] != svc->port) {
		if (!svc->port)
			IP_VS_ERR("Schedule: port zero only supported "
				  "in persistent services, "
				  "check your ipvs configuration\n");
		return NULL;
	}

	dest = svc->scheduler->schedule(svc, skb);
	if (dest == NULL) {
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		return NULL;
	}

	/*
	 *    Create a connection entry.
	 */
	cp = ip_vs_conn_new(iph->protocol,
			    iph->saddr, pptr[0],
			    iph->daddr, pptr[1],
			    dest->addr, dest->port?dest->port:pptr[1],
			    0,
			    dest);
	if (cp == NULL)
		return NULL;

	IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
		  "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n",
		  ip_vs_fwd_tag(cp),
		  NIPQUAD(cp->caddr), ntohs(cp->cport),
		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
		  NIPQUAD(cp->daddr), ntohs(cp->dport),
		  cp->flags, atomic_read(&cp->refcnt));

	ip_vs_conn_stats(cp, svc);
	return cp;
}
Пример #11
0
/*
 *  IPVS main scheduling function
 *  It selects a server according to the virtual service, and
 *  creates a connection entry.
 */
static struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph)
{
	struct ip_vs_conn *cp = NULL;
	struct ip_vs_dest *dest;
	const __u16 *portp;

	/*
	 *    Persistent service
	 */
	if (svc->flags & IP_VS_SVC_F_PERSISTENT)
		return ip_vs_sched_persist(svc, iph);

	/*
	 *    Non-persistent service
	 */
	portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
	if (!svc->fwmark && portp[1] != svc->port) {
		if (!svc->port)
			IP_VS_ERR("Schedule: port zero only supported "
				  "in persistent services, "
				  "check your ipvs configuration\n");
		return NULL;
	}

	dest = svc->scheduler->schedule(svc, iph);
	if (dest == NULL) {
		IP_VS_DBG(1, "Schedule: no dest found.\n");
		return NULL;
	}

	/*
	 *    Create a connection entry.
	 */
	cp = ip_vs_conn_new(iph->protocol,
			    iph->saddr, portp[0],
			    iph->daddr, portp[1],
			    dest->addr, dest->port?dest->port:portp[1],
			    0,
			    dest);
	if (cp == NULL)
		return NULL;

	/*
	 *    Increase the inactive connection counter because it is in
	 *    Syn-Received state (inactive) when the connection is created.
	 */
	atomic_inc(&dest->inactconns);

	IP_VS_DBG(6, "Schedule fwd:%c s:%s c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u "
		  "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n",
		  ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state),
		  NIPQUAD(cp->caddr), ntohs(cp->cport),
		  NIPQUAD(cp->vaddr), ntohs(cp->vport),
		  NIPQUAD(cp->daddr), ntohs(cp->dport),
		  cp->flags, atomic_read(&cp->refcnt));

	return cp;
}
Пример #12
0
static void sync_backup_loop(void)
{
	struct socket *sock;
	char *buf;
	int len;

	if (!(buf=kmalloc(SYNC_MESG_MAX_SIZE, GFP_ATOMIC))) {
		IP_VS_ERR("sync_backup_loop: kmalloc error\n");
		return;
	}

	/* create the receiving multicast socket */
	sock = make_receive_sock();
	if (!sock)
		goto out;

	for (;;) {
		/* do you have data now? */
		while (!skb_queue_empty(&(sock->sk->receive_queue))) {
			if ((len=ip_vs_receive(sock, buf,
					       SYNC_MESG_MAX_SIZE))<=0) {
				IP_VS_ERR("receiving message error\n");
				break;
			}
			/* disable bottom half, because it accessed the data
			   shared by softirq while getting/creating conns */
			local_bh_disable();
			ip_vs_process_message(buf, len);
			local_bh_enable();
		}

		if (stop_sync)
			break;

		__set_current_state(TASK_INTERRUPTIBLE);
		schedule_timeout(HZ);
		__set_current_state(TASK_RUNNING);
	}

	/* release the sending multicast socket */
	sock_release(sock);

  out:
	kfree(buf);
}
Пример #13
0
/*
 *      Add an ip_vs_conn information into the current sync_buff.
 *      Called by ip_vs_in.
 */
void ip_vs_sync_conn(struct ip_vs_conn *cp)
{
	struct ip_vs_sync_mesg *m;
	struct ip_vs_sync_conn *s;
	int len;

	spin_lock(&curr_sb_lock);
	if (!curr_sb) {
		if (!(curr_sb=ip_vs_sync_buff_create())) {
			spin_unlock(&curr_sb_lock);
			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
			return;
		}
	}

	len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
		SIMPLE_CONN_SIZE;
	m = curr_sb->mesg;
	s = (struct ip_vs_sync_conn *)curr_sb->head;

	/* copy members */
	s->protocol = cp->protocol;
	s->cport = cp->cport;
	s->vport = cp->vport;
	s->dport = cp->dport;
	s->caddr = cp->caddr;
	s->vaddr = cp->vaddr;
	s->daddr = cp->daddr;
	s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
	s->state = htons(cp->state);
	if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
		struct ip_vs_sync_conn_options *opt =
			(struct ip_vs_sync_conn_options *)&s[1];
		memcpy(opt, &cp->in_seq, sizeof(*opt));
	}

	m->nr_conns++;
	m->size += len;
	curr_sb->head += len;

	/* check if there is a space for next one */
	if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
		sb_queue_tail(curr_sb);
		curr_sb = NULL;
	}
	spin_unlock(&curr_sb_lock);

	/* synchronize its controller if it has */
	if (cp->control)
		ip_vs_sync_conn(cp->control);
}
Пример #14
0
/*
 *      Set up sending multicast socket over UDP
 */
static struct socket * make_send_sock(void)
{
	struct socket *sock;

	/* First create a socket */
	if (sock_create(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
		IP_VS_ERR("Error during creation of socket; terminating\n");
		return NULL;
	}

	if (set_mcast_if(sock->sk, ip_vs_mcast_ifn) < 0) {
		IP_VS_ERR("Error setting outbound mcast interface\n");
		goto error;
	}

	set_mcast_loop(sock->sk, 0);
	set_mcast_ttl(sock->sk, 1);

	if (bind_mcastif_addr(sock, ip_vs_mcast_ifn) < 0) {
		IP_VS_ERR("Error binding address of the mcast interface\n");
		goto error;
	}

	if (sock->ops->connect(sock,
			       (struct sockaddr*)&mcast_addr,
			       sizeof(struct sockaddr), 0) < 0) {
		IP_VS_ERR("Error connecting to the multicast addr\n");
		goto error;
	}

	return sock;

  error:
	sock_release(sock);
	return NULL;
}
Пример #15
0
static int fork_sync_thread(void *startup)
{
	pid_t pid;

	/* fork the sync thread here, then the parent process of the
	   sync thread is the init process after this thread exits. */
  repeat:
	if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
		IP_VS_ERR("could not create sync_thread due to %d... "
			  "retrying.\n", pid);
		current->state = TASK_UNINTERRUPTIBLE;
		schedule_timeout(HZ);
		goto repeat;
	}

	return 0;
}
Пример #16
0
/*
 *	Unhash ip_vs_lblc_entry from ip_vs_lblc_table.
 *	returns bool success.
 */
static int ip_vs_lblc_unhash(struct ip_vs_lblc_table *tbl,
			     struct ip_vs_lblc_entry *en)
{
	if (list_empty(&en->list)) {
		IP_VS_ERR("ip_vs_lblc_unhash(): request for not hashed entry, "
			  "called from %p\n", __builtin_return_address(0));
		return 0;
	}

	/*
	 * Remove it from the table
	 */
	write_lock(&tbl->lock);
	list_del(&en->list);
	INIT_LIST_HEAD(&en->list);
	write_unlock(&tbl->lock);

	return 1;
}
static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
{
	int i;
	struct ip_vs_lblcr_table *tbl;

	/*
	 *    Allocate the ip_vs_lblcr_table for this service
	 */
	tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC);
	if (tbl == NULL) {
		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
		return -ENOMEM;
	}
	svc->sched_data = tbl;
	IP_VS_DBG(6, "LBLCR hash table (memory=%dbytes) allocated for "
		  "current service\n",
		  sizeof(struct ip_vs_lblcr_table));

	/*
	 *    Initialize the hash buckets
	 */
	for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
		INIT_LIST_HEAD(&tbl->bucket[i]);
	}
	tbl->lock = RW_LOCK_UNLOCKED;
	tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16;
	tbl->rover = 0;
	tbl->counter = 1;

	/*
	 *    Hook periodic timer for garbage collection
	 */
	init_timer(&tbl->periodic_timer);
	tbl->periodic_timer.data = (unsigned long)tbl;
	tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
	tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
	add_timer(&tbl->periodic_timer);

#ifdef CONFIG_IP_VS_LBLCR_DEBUG
	lblcr_table_list = tbl;
#endif
	return 0;
}
Пример #18
0
/*
 *      new/free a ip_vs_lblc_entry, which is a mapping of a destionation
 *      IP address to a server.
 */
static inline struct ip_vs_lblc_entry *
ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest)
{
	struct ip_vs_lblc_entry *en;

	en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC);
	if (en == NULL) {
		IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
		return NULL;
	}

	INIT_LIST_HEAD(&en->list);
	en->addr = daddr;

	atomic_inc(&dest->refcnt);
	en->dest = dest;

	return en;
}
/*
 *      new/free a ip_vs_lblcr_entry, which is a mapping of a destination
 *      IP address to a server.
 */
static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr)
{
	struct ip_vs_lblcr_entry *en;

	en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC);
	if (en == NULL) {
		IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
		return NULL;
	}

	INIT_LIST_HEAD(&en->list);
	en->addr = daddr;

	/* initilize its dest set */
	atomic_set(&(en->set.size), 0);
	en->set.list = NULL;
	en->set.lock = RW_LOCK_UNLOCKED;

	return en;
}
static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
{
	struct ip_vs_wrr_mark *mark;

	/*
	 *    Allocate the mark variable for WRR scheduling
	 */
	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
	if (mark == NULL) {
		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
		return -ENOMEM;
	}
	mark->cl = &svc->destinations;
	mark->cw = 0;
	mark->mw = ip_vs_wrr_max_weight(svc);
	mark->di = ip_vs_wrr_gcd_weight(svc);
	svc->sched_data = mark;

	return 0;
}
Пример #21
0
static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
{
	struct ip_vs_sh_bucket *tbl;

	/* allocate the SH table for this service */
	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
		      GFP_ATOMIC);
	if (tbl == NULL) {
		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
		return -ENOMEM;
	}
	svc->sched_data = tbl;
	IP_VS_DBG(6, "SH hash table (memory=%dbytes) allocated for "
		  "current service\n",
		  sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE);

	/* assign the hash buckets with the updated service */
	ip_vs_sh_assign(tbl, svc);

	return 0;
}
Пример #22
0
/*
 *	Hash an entry in the ip_vs_lblc_table.
 *	returns bool success.
 */
static int
ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en)
{
	unsigned hash;

	if (!list_empty(&en->list)) {
		IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, "
			  "called from %p\n", __builtin_return_address(0));
		return 0;
	}

	/*
	 *	Hash by destination IP address
	 */
	hash = ip_vs_lblc_hashkey(en->addr);

	write_lock(&tbl->lock);
	list_add(&en->list, &tbl->bucket[hash]);
	atomic_inc(&tbl->entries);
	write_unlock(&tbl->lock);

	return 1;
}
Пример #23
0
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
	struct net_device *dev;
	u32 addr;
	struct sockaddr_in sin;

	if ((dev = __dev_get_by_name(ifname)) == NULL)
		return -ENODEV;

	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
	if (!addr)
		IP_VS_ERR("You probably need to specify IP address on "
			  "multicast interface.\n");

	IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n",
		  ifname, NIPQUAD(addr));

	/* Now bind the socket with the address of multicast interface */
	sin.sin_family	     = AF_INET;
	sin.sin_addr.s_addr  = addr;
	sin.sin_port         = 0;

	return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
}
Пример #24
0
/*
 *	Initialize IP Virtual Server
 */
static int __init ip_vs_init(void)
{
	int ret;

	ret = ip_vs_control_init();
	if (ret < 0) {
		IP_VS_ERR("can't setup control.\n");
		goto cleanup_nothing;
	}

	ip_vs_protocol_init();

	ret = ip_vs_app_init();
	if (ret < 0) {
		IP_VS_ERR("can't setup application helper.\n");
		goto cleanup_protocol;
	}

	ret = ip_vs_conn_init();
	if (ret < 0) {
		IP_VS_ERR("can't setup connection table.\n");
		goto cleanup_app;
	}

	ret = nf_register_hook(&ip_vs_in_ops);
	if (ret < 0) {
		IP_VS_ERR("can't register in hook.\n");
		goto cleanup_conn;
	}

	ret = nf_register_hook(&ip_vs_out_ops);
	if (ret < 0) {
		IP_VS_ERR("can't register out hook.\n");
		goto cleanup_inops;
	}
	ret = nf_register_hook(&ip_vs_post_routing_ops);
	if (ret < 0) {
		IP_VS_ERR("can't register post_routing hook.\n");
		goto cleanup_outops;
	}
	ret = nf_register_hook(&ip_vs_forward_icmp_ops);
	if (ret < 0) {
		IP_VS_ERR("can't register forward_icmp hook.\n");
		goto cleanup_postroutingops;
	}

	IP_VS_INFO("ipvs loaded.\n");
	return ret;

  cleanup_postroutingops:
	nf_unregister_hook(&ip_vs_post_routing_ops);
  cleanup_outops:
	nf_unregister_hook(&ip_vs_out_ops);
  cleanup_inops:
	nf_unregister_hook(&ip_vs_in_ops);
  cleanup_conn:
	ip_vs_conn_cleanup();
  cleanup_app:
	ip_vs_app_cleanup();
  cleanup_protocol:
	ip_vs_protocol_cleanup();
	ip_vs_control_cleanup();
  cleanup_nothing:
	return ret;
}
Пример #25
0
/*
 *	Handle ICMP messages in the inside-to-outside direction (outgoing).
 *	Find any that might be relevant, check against existing connections,
 *	forward to the right destination host if relevant.
 *	Currently handles error types - unreachable, quench, ttl exceeded.
 *      (Only used in VS/NAT)
 */
static int ip_vs_out_icmp(struct sk_buff **skb_p)
{
	struct sk_buff	*skb   = *skb_p;
	struct iphdr	*iph;
	struct icmphdr	*icmph;
	struct iphdr	*ciph;	/* The ip header contained within the ICMP */
	__u16		*pptr;	/* port numbers from TCP/UDP contained header */
	unsigned short	ihl;
	unsigned short	len;
	unsigned short	clen, csize;
	struct ip_vs_conn *cp;

	/* reassemble IP fragments, but will it happen in ICMP packets?? */
	if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
		skb = ip_defrag(skb, IP_DEFRAG_VS_OUT);
		if (!skb)
			return NF_STOLEN;
		*skb_p = skb;
	}

	if (skb_is_nonlinear(skb)) {
		if (skb_linearize(skb, GFP_ATOMIC) != 0)
			return NF_DROP;
		ip_send_check(skb->nh.iph);
	}

	iph = skb->nh.iph;
	ihl = iph->ihl << 2;
	icmph = (struct icmphdr *)((char *)iph + ihl);
	len   = ntohs(iph->tot_len) - ihl;
	if (len < sizeof(struct icmphdr))
		return NF_DROP;

	IP_VS_DBG(12, "outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
		  icmph->type, ntohs(icmp_id(icmph)),
		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));

	/*
	 * Work through seeing if this is for us.
	 * These checks are supposed to be in an order that means easy
	 * things are checked first to speed up processing.... however
	 * this means that some packets will manage to get a long way
	 * down this stack and then be rejected, but that's life.
	 */
	if ((icmph->type != ICMP_DEST_UNREACH) &&
	    (icmph->type != ICMP_SOURCE_QUENCH) &&
	    (icmph->type != ICMP_TIME_EXCEEDED))
		return NF_ACCEPT;

	/* Now find the contained IP header */
	clen = len - sizeof(struct icmphdr);
	if (clen < sizeof(struct iphdr))
		return NF_DROP;
	ciph = (struct iphdr *) (icmph + 1);
	csize = ciph->ihl << 2;
	if (clen < csize)
		return NF_DROP;

	/* We are only interested ICMPs generated from TCP or UDP packets */
	if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP)
		return NF_ACCEPT;

	/* Skip non-first embedded TCP/UDP fragments */
	if (ciph->frag_off & __constant_htons(IP_OFFSET))
		return NF_ACCEPT;

	/* We need at least TCP/UDP ports here */
	if (clen < csize + sizeof(struct udphdr))
		return NF_DROP;

	/*
	 * Find the ports involved - this packet was
	 * incoming so the ports are right way round
	 * (but reversed relative to outer IP header!)
	 */
	pptr = (__u16 *)&(((char *)ciph)[csize]);

	/* Ensure the checksum is correct */
	if (ip_compute_csum((unsigned char *) icmph, len)) {
		/* Failed checksum! */
		IP_VS_DBG(1, "forward ICMP: failed checksum from %d.%d.%d.%d!\n",
			  NIPQUAD(iph->saddr));
		return NF_DROP;
	}

	IP_VS_DBG(11, "Handling outgoing ICMP for "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

	/* ciph content is actually <protocol, caddr, cport, daddr, dport> */
	cp = ip_vs_conn_out_get(ciph->protocol, ciph->daddr, pptr[1],
				ciph->saddr, pptr[0]);
	if (!cp)
		return NF_ACCEPT;

	if (IP_VS_FWD_METHOD(cp) != 0) {
		IP_VS_ERR("shouldn't reach here, because the box is on the"
			  "half connection in the tun/dr module.\n");
	}

	/* Now we do real damage to this packet...! */
	/* First change the source IP address, and recalc checksum */
	iph->saddr = cp->vaddr;
	ip_send_check(iph);

	/* Now change the *dest* address in the contained IP */
	ciph->daddr = cp->vaddr;
	ip_send_check(ciph);

	/* the TCP/UDP dest port - cannot redo check */
	pptr[1] = cp->vport;

	/* And finally the ICMP checksum */
	icmph->checksum = 0;
	icmph->checksum = ip_compute_csum((unsigned char *) icmph, len);
	skb->ip_summed = CHECKSUM_UNNECESSARY;

	/* do the statistics and put it back */
	ip_vs_out_stats(cp, skb);
	ip_vs_conn_put(cp);

	IP_VS_DBG(11, "Forwarding correct outgoing ICMP to "
		  "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n",
		  NIPQUAD(ciph->saddr), ntohs(pptr[0]),
		  NIPQUAD(ciph->daddr), ntohs(pptr[1]));

	skb->nfcache |= NFC_IPVS_PROPERTY;

	return NF_ACCEPT;
}
Пример #26
0
/*
 *	Handle ICMP messages in the inside-to-outside direction (outgoing).
 *	Find any that might be relevant, check against existing connections,
 *	forward to the right destination host if relevant.
 *	Currently handles error types - unreachable, quench, ttl exceeded.
 *	(Only used in VS/NAT)
 */
static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
{
	struct sk_buff *skb = *pskb;
	struct iphdr *iph;
	struct icmphdr	_icmph, *ic;
	struct iphdr	_ciph, *cih;	/* The ip header contained within the ICMP */
	struct ip_vs_conn *cp;
	struct ip_vs_protocol *pp;
	unsigned int offset, ihl, verdict;

	*related = 1;

	/* reassemble IP fragments */
	if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
		skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
		if (!skb)
			return NF_STOLEN;
		*pskb = skb;
	}

	iph = ip_hdr(skb);
	offset = ihl = iph->ihl * 4;
	ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
	if (ic == NULL)
		return NF_DROP;

	IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n",
		  ic->type, ntohs(icmp_id(ic)),
		  NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));

	/*
	 * Work through seeing if this is for us.
	 * These checks are supposed to be in an order that means easy
	 * things are checked first to speed up processing.... however
	 * this means that some packets will manage to get a long way
	 * down this stack and then be rejected, but that's life.
	 */
	if ((ic->type != ICMP_DEST_UNREACH) &&
	    (ic->type != ICMP_SOURCE_QUENCH) &&
	    (ic->type != ICMP_TIME_EXCEEDED)) {
		*related = 0;
		return NF_ACCEPT;
	}

	/* Now find the contained IP header */
	offset += sizeof(_icmph);
	cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
	if (cih == NULL)
		return NF_ACCEPT; /* The packet looks wrong, ignore */

	pp = ip_vs_proto_get(cih->protocol);
	if (!pp)
		return NF_ACCEPT;

	/* Is the embedded protocol header present? */
	if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
		     pp->dont_defrag))
		return NF_ACCEPT;

	IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for");

	offset += cih->ihl * 4;

	/* The embedded headers contain source and dest in reverse order */
	cp = pp->conn_out_get(skb, pp, cih, offset, 1);
	if (!cp)
		return NF_ACCEPT;

	verdict = NF_DROP;

	if (IP_VS_FWD_METHOD(cp) != 0) {
		IP_VS_ERR("shouldn't reach here, because the box is on the"
			  "half connection in the tun/dr module.\n");
	}

	/* Ensure the checksum is correct */
	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
		/* Failed checksum! */
		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
			  NIPQUAD(iph->saddr));
		goto out;
	}

	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
		offset += 2 * sizeof(__u16);
	if (!ip_vs_make_skb_writable(pskb, offset))
		goto out;
	skb = *pskb;

	ip_vs_nat_icmp(skb, pp, cp, 1);

	/* do the statistics and put it back */
	ip_vs_out_stats(cp, skb);

	skb->ipvs_property = 1;
	verdict = NF_ACCEPT;

  out:
	__ip_vs_conn_put(cp);

	return verdict;
}