int start_sync_thread(int state, char *mcast_ifn) { DECLARE_COMPLETION(startup); pid_t pid; if (sync_pid) return -EEXIST; IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %d bytes\n", sizeof(struct ip_vs_sync_conn)); ip_vs_sync_state = state; strcpy(ip_vs_mcast_ifn, mcast_ifn); repeat: if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { IP_VS_ERR("could not create fork_sync_thread due to %d... " "retrying.\n", pid); current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ); goto repeat; } wait_for_completion(&startup); return 0; }
/* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_bucket *tbl; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); tbl = (struct ip_vs_sh_bucket *)svc->sched_data; dest = ip_vs_sh_get(tbl, iph->saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { return NULL; } IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->saddr), NIPQUAD(dest->addr), ntohs(dest->port)); return dest; }
/* * IPVS main scheduling function * It selects a server according to the virtual service, and * creates a connection entry. */ static struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_conn *cp = NULL; struct ip_vs_dest *dest; const __u16 *portp; /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, iph); /* * Non-persistent service */ portp = (__u16 *)&(((char *)iph)[iph->ihl*4]); if (!svc->fwmark && portp[1] != svc->port) { if (!svc->port) IP_VS_ERR("Schedule: port zero only supported " "in persistent services, " "check your ipvs configuration\n"); return NULL; } dest = svc->scheduler->schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } /* * Create a connection entry. */ cp = ip_vs_conn_new(iph->protocol, iph->saddr, portp[0], iph->daddr, portp[1], dest->addr, dest->port?dest->port:portp[1], 0, dest); if (cp == NULL) return NULL; /* * Increase the inactive connection counter because it is in * Syn-Received state (inactive) when the connection is created. */ atomic_inc(&dest->inactconns); IP_VS_DBG(6, "Schedule fwd:%c s:%s c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " "d:%u.%u.%u.%u:%u flg:%X cnt:%d\n", ip_vs_fwd_tag(cp), ip_vs_state_name(cp->state), NIPQUAD(cp->caddr), ntohs(cp->cport), NIPQUAD(cp->vaddr), ntohs(cp->vport), NIPQUAD(cp->daddr), ntohs(cp->dport), cp->flags, atomic_read(&cp->refcnt)); return cp; }
/* * IPVS main scheduling function * It selects a server according to the virtual service, and * creates a connection entry. * Protocols supported: TCP, UDP */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_conn *cp = NULL; struct iphdr *iph = ip_hdr(skb); struct ip_vs_dest *dest; __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); if (pptr == NULL) return NULL; /* * Persistent service */ if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, skb, pptr); /* * Non-persistent service */ if (!svc->fwmark && pptr[1] != svc->port) { if (!svc->port) IP_VS_ERR("Schedule: port zero only supported " "in persistent services, " "check your ipvs configuration\n"); return NULL; } dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } /* * Create a connection entry. */ cp = ip_vs_conn_new(iph->protocol, iph->saddr, pptr[0], iph->daddr, pptr[1], dest->addr, dest->port?dest->port:pptr[1], 0, dest); if (cp == NULL) return NULL; IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), NIPQUAD(cp->caddr), ntohs(cp->cport), NIPQUAD(cp->vaddr), ntohs(cp->vport), NIPQUAD(cp->daddr), ntohs(cp->dport), cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; }
/* * Weighted Least Connection scheduling */ static struct ip_vs_dest * ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { register struct list_head *l, *e; struct ip_vs_dest *dest, *least; unsigned int loh, doh; IP_VS_DBG(6, "ip_vs_wlc_schedule(): Scheduling...\n"); /* * We calculate the load of each dest server as follows: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! * The comparison of h1*w2 > h2*w1 is equivalent to that of * h1/w1 > h2/w2 * if every weight is larger than zero. * * The server with weight=0 is quiesced and will not receive any * new connections. */ l = &svc->destinations; for (e=l->next; e!=l; e=e->next) { least = list_entry(e, struct ip_vs_dest, n_list); if (atomic_read(&least->weight) > 0) { loh = ip_vs_wlc_dest_overhead(least); goto nextstage; } } return NULL; /* * Find the destination with the least load. */ nextstage: for (e=e->next; e!=l; e=e->next) { dest = list_entry(e, struct ip_vs_dest, n_list); doh = ip_vs_wlc_dest_overhead(dest); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest; loh = doh; } } IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d overhead %d\n", NIPQUAD(least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); return least; }
/* * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_lblcr_table *tbl; struct ip_vs_lblcr_entry *en; IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); tbl = (struct ip_vs_lblcr_table *)svc->sched_data; en = ip_vs_lblcr_get(tbl, iph->daddr); if (en == NULL) { dest = __ip_vs_wlc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; } en = ip_vs_lblcr_new(iph->daddr); if (en == NULL) { return NULL; } ip_vs_dest_set_insert(&en->set, dest); ip_vs_lblcr_hash(tbl, en); } else { dest = ip_vs_dest_set_min(&en->set); if (!dest || is_overloaded(dest, svc)) { dest = __ip_vs_wlc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; } ip_vs_dest_set_insert(&en->set, dest); } if (atomic_read(&en->set.size) > 1 && jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { struct ip_vs_dest *m; m = ip_vs_dest_set_max(&en->set); if (m) ip_vs_dest_set_erase(&en->set, m); } } en->lastuse = jiffies; IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(en->addr), NIPQUAD(dest->addr), ntohs(dest->port)); return dest; }
/* * Least Connection scheduling */ static struct ip_vs_dest * ip_vs_lc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct list_head *l, *e; struct ip_vs_dest *dest, *least; unsigned int loh, doh; IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n"); /* * Simply select the server with the least number of * (activeconns<<5) + inactconns * Except whose weight is equal to zero. * If the weight is equal to zero, it means that the server is * quiesced, the existing connections to the server still get * served, but no new connection is assigned to the server. */ l = &svc->destinations; for (e=l->next; e!=l; e=e->next) { least = list_entry (e, struct ip_vs_dest, n_list); if (atomic_read(&least->weight) > 0) { loh = ip_vs_lc_dest_overhead(least); goto nextstage; } } return NULL; /* * Find the destination with the least load. */ nextstage: for (e=e->next; e!=l; e=e->next) { dest = list_entry(e, struct ip_vs_dest, n_list); if (atomic_read(&dest->weight) == 0) continue; doh = ip_vs_lc_dest_overhead(dest); if (doh < loh) { least = dest; loh = doh; } } IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", NIPQUAD(least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->inactconns)); return least; }
int stop_sync_thread(void) { DECLARE_WAITQUEUE(wait, current); if (!sync_pid) return -ESRCH; IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); IP_VS_INFO("stopping sync thread %d ...\n", sync_pid); __set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&stop_sync_wait, &wait); ip_vs_sync_state = IP_VS_STATE_NONE; stop_sync = 1; wake_up(&sync_wait); schedule(); __set_current_state(TASK_RUNNING); remove_wait_queue(&stop_sync_wait, &wait); /* Note: no need to reap the sync thread, because its parent process is the init process */ if (stop_sync) IP_VS_BUG(); return 0; }
/* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; struct ip_vs_iphdr iph; ip_vs_fill_iph_addr_only(svc->af, skb, &iph); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); s = (struct ip_vs_sh_state *) svc->sched_data; dest = ip_vs_sh_get(svc->af, s, &iph.saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph.saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; }
static inline struct ip_vs_dest * __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { register struct list_head *l, *e; struct ip_vs_dest *dest, *least; int loh, doh; /* * We think the overhead of processing active connections is fifty * times higher than that of inactive connections in average. (This * fifty times might not be accurate, we will change it later.) We * use the following formula to estimate the overhead: * dest->activeconns*50 + dest->inactconns * and the load: * (dest overhead) / dest->weight * * Remember -- no floats in kernel mode!!! * The comparison of h1*w2 > h2*w1 is equivalent to that of * h1/w1 > h2/w2 * if every weight is larger than zero. * * The server with weight=0 is quiesced and will not receive any * new connection. */ l = &svc->destinations; for (e=l->next; e!=l; e=e->next) { least = list_entry(e, struct ip_vs_dest, n_list); if (atomic_read(&least->weight) > 0) { loh = atomic_read(&least->activeconns) * 50 + atomic_read(&least->inactconns); goto nextstage; } } return NULL; /* * Find the destination with the least load. */ nextstage: for (e=e->next; e!=l; e=e->next) { dest = list_entry(e, struct ip_vs_dest, n_list); doh = atomic_read(&dest->activeconns) * 50 + atomic_read(&dest->inactconns); if (loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) { least = dest; loh = doh; } } IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", NIPQUAD(least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); return least; }
static struct ip_vs_dest * ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_dh_bucket *tbl; struct ip_vs_iphdr iph; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); tbl = (struct ip_vs_dh_bucket *)svc->sched_data; dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { return NULL; } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph.daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; }
static struct ip_vs_conn * ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; if (likely(!inverse)) { cp = ip_vs_conn_out_get(IPPROTO_UDP, iph->saddr, htons(PORT_ISAKMP), iph->daddr, htons(PORT_ISAKMP)); } else { cp = ip_vs_conn_out_get(IPPROTO_UDP, iph->daddr, htons(PORT_ISAKMP), iph->saddr, htons(PORT_ISAKMP)); } if (!cp) { IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", inverse ? "ICMP+" : "", pp->name, NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); } return cp; }
/* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) port = ip_vs_sh_get_port(skb, iph); s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port); else dest = ip_vs_sh_get(svc, s, &iph->saddr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph->saddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; }
static struct rtable * __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) { struct rtable *rt; /* Route to the other host */ struct ip_vs_dest *dest = cp->dest; if (dest) { spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos, 0))) { struct flowi fl = { .oif = 0, .nl_u = { .ip4_u = { .daddr = dest->addr.ip, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(&init_net, &rt, &fl)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, atomic_read(&rt->u.dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else {
/* * Get route to destination or remote server * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, * &4=Allow redirect from remote daddr to local */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, __be32 daddr, u32 rtos, int rt_mode) { struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ struct rtable *ort; /* Original route */ int local; if (dest) { spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { .oif = 0, .nl_u = { .ip4_u = { .daddr = dest->addr.ip, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(net, &rt, &fl)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, atomic_read(&rt->dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); } else {
/* * Locality-Based (weighted) Least-Connection scheduling */ static struct ip_vs_dest * ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_lblc_table *tbl; struct ip_vs_lblc_entry *en; IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); tbl = (struct ip_vs_lblc_table *)svc->sched_data; en = ip_vs_lblc_get(tbl, iph->daddr); if (en == NULL) { dest = __ip_vs_wlc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; } en = ip_vs_lblc_new(iph->daddr, dest); if (en == NULL) { return NULL; } ip_vs_lblc_hash(tbl, en); } else { dest = en->dest; if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest, svc)) { dest = __ip_vs_wlc_schedule(svc, iph); if (dest == NULL) { IP_VS_DBG(1, "no destination available\n"); return NULL; } atomic_dec(&en->dest->refcnt); atomic_inc(&dest->refcnt); en->dest = dest; } } en->lastuse = jiffies; IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(en->addr), NIPQUAD(dest->addr), ntohs(dest->port)); return dest; }
/* * Check if skb has user data. * Attention: decrease iph len also. */ static inline int syn_proxy_ack_has_data(struct sk_buff *skb, struct ip_vs_iphdr *iph, struct tcphdr *th) { IP_VS_DBG(6, "tot_len = %u, iph_len = %u, tcph_len = %u\n", skb->len, iph->len, th->doff * 4); return (skb->len - iph->len - th->doff * 4) != 0; }
static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode, struct ip_vs_iphdr *ipvsh, struct sk_buff *skb, int mtu) { #ifdef CONFIG_IP_VS_IPV6 if (skb_af == AF_INET6) { struct net *net = dev_net(skb_dst(skb)->dev); if (unlikely(__mtu_check_toobig_v6(skb, mtu))) { if (!skb->dev) skb->dev = net->loopback_dev; /* only send ICMP too big on first fragment */ if (!ipvsh->fragoffs) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG(1, "frag needed for %pI6c\n", &ipv6_hdr(skb)->saddr); return false; } } else #endif { struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); /* If we're going to tunnel the packet and pmtu discovery * is disabled, we'll just fragment it anyway */ if ((rt_mode & IP_VS_RT_MODE_TUNNEL) && !sysctl_pmtu_disc(ipvs)) return true; if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) && skb->len > mtu && !skb_is_gso(skb))) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG(1, "frag needed for %pI4\n", &ip_hdr(skb)->saddr); return false; } } return true; }
static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s = svc->sched_data; /* got to clean up hash buckets here */ ip_vs_sh_flush(s); /* release the table itself */ kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); }
/* * Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct list_head *p, *q; struct ip_vs_dest *dest; IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n"); write_lock(&svc->sched_lock); p = (struct list_head *)svc->sched_data; p = p->next; q = p; do { /* skip list head */ if (q == &svc->destinations) { q = q->next; continue; } dest = list_entry(q, struct ip_vs_dest, n_list); if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) > 0) /* HIT */ goto out; q = q->next; } while (q != p); write_unlock(&svc->sched_lock); return NULL; out: svc->sched_data = q; write_unlock(&svc->sched_lock); IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " "activeconns %d refcnt %d weight %d\n", NIPQUAD(dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), atomic_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; }
/* get weighted least-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) { register struct ip_vs_dest_list *e; struct ip_vs_dest *dest, *least; int loh, doh; if (set == NULL) return NULL; read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { least = e->dest; if (least->flags & IP_VS_DEST_F_OVERLOAD) continue; if ((atomic_read(&least->weight) > 0) && (least->flags & IP_VS_DEST_F_AVAILABLE)) { loh = atomic_read(&least->activeconns) * 50 + atomic_read(&least->inactconns); goto nextstage; } } read_unlock(&set->lock); return NULL; /* find the destination with the weighted least load */ nextstage: for (e=e->next; e!=NULL; e=e->next) { dest = e->dest; if (dest->flags & IP_VS_DEST_F_OVERLOAD) continue; doh = atomic_read(&dest->activeconns) * 50 + atomic_read(&dest->inactconns); if ((loh * atomic_read(&dest->weight) > doh * atomic_read(&least->weight)) && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { least = dest; loh = doh; } } read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", NIPQUAD(least->addr), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); return least; }
/* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_sh_bucket *tbl; struct iphdr *iph = ip_hdr(skb); IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); tbl = (struct ip_vs_sh_bucket *)svc->sched_data; dest = ip_vs_sh_get(tbl, iph->saddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { return NULL; } IP_VS_DBG(6, "SH: source IP address %pI4 --> server %pI4:%u\n", &iph->saddr, &dest->addr, ntohs(dest->port)); return dest; }
static int ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_bucket *tbl = svc->sched_data; /* got to clean up hash buckets here */ ip_vs_sh_flush(tbl); /* release the table itself */ kfree(svc->sched_data); IP_VS_DBG(6, "SH hash table (memory=%dbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); return 0; }
static int ip_vs_dh_done_svc(struct ip_vs_service *svc) { struct ip_vs_dh_bucket *tbl = svc->sched_data; ip_vs_dh_flush(tbl); kfree(svc->sched_data); IP_VS_DBG(6, "DH hash table (memory=%Zdbytes) released\n", sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE); return 0; }
/* * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_wrr_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_wrr_mark *mark = svc->sched_data; IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n"); /* * This loop will always terminate, because 0<mark->cw<max_weight, * and at least one server has its weight equal to max_weight. */ write_lock(&svc->sched_lock); while (1) { if (mark->cl == &svc->destinations) { /* it is at the head of the destination list */ if (mark->cl == mark->cl->next) { /* no dest entry */ write_unlock(&svc->sched_lock); return NULL; } mark->cl = svc->destinations.next; mark->cw -= mark->di; if (mark->cw <= 0) { mark->cw = mark->mw; /* * Still zero, which means no available servers. */ if (mark->cw == 0) { mark->cl = &svc->destinations; write_unlock(&svc->sched_lock); IP_VS_INFO("ip_vs_wrr_schedule(): " "no available servers\n"); return NULL; } } } else mark->cl = mark->cl->next; if (mark->cl != &svc->destinations) { /* not at the head of the list */ dest = list_entry(mark->cl, struct ip_vs_dest, n_list); if (atomic_read(&dest->weight) >= mark->cw) { write_unlock(&svc->sched_lock); break; } } }
/* * Update out-in ack_seqs: include th->ack_seq, sack opt * and also correct tcph->check. */ void ip_vs_synproxy_dnat_handler(struct tcphdr *tcph, struct ip_vs_seq *sp_seq) { __u32 old_ack_seq; if (sp_seq->delta != 0) { old_ack_seq = ntohl(tcph->ack_seq); tcph->ack_seq = htonl((__u32) (old_ack_seq - sp_seq->delta)); syn_proxy_seq_csum_update(tcph, htonl(old_ack_seq), tcph->ack_seq); syn_proxy_filter_opt_outin(tcph, sp_seq); IP_VS_DBG(6, "tcp_dnat_handler: tcph->ack_seq %u => %u, delta = %u \n", old_ack_seq, htonl(tcph->ack_seq), sp_seq->delta); } }
/* get weighted most-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) { register struct ip_vs_dest_list *e; struct ip_vs_dest *dest, *most; int moh, doh; if (set == NULL) return NULL; read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { most = e->dest; if (atomic_read(&most->weight) > 0) { moh = atomic_read(&most->activeconns) * 50 + atomic_read(&most->inactconns); goto nextstage; } } read_unlock(&set->lock); return NULL; /* find the destination with the weighted most load */ nextstage: for (e=e->next; e!=NULL; e=e->next) { dest = e->dest; doh = atomic_read(&dest->activeconns) * 50 + atomic_read(&dest->inactconns); /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ if ((moh * atomic_read(&dest->weight) < doh * atomic_read(&most->weight)) && (atomic_read(&dest->weight) > 0)) { most = dest; moh = doh; } } read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", NIPQUAD(most->addr), ntohs(most->port), atomic_read(&most->activeconns), atomic_read(&most->refcnt), atomic_read(&most->weight), moh); return most; }
static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) { struct ip_vs_lblc_table *tbl = svc->sched_data; /* remove periodic timer */ del_timer_sync(&tbl->periodic_timer); /* got to clean up table entries here */ ip_vs_lblc_flush(tbl); /* release the table itself */ kfree(svc->sched_data); IP_VS_DBG(6, "LBLC hash table (memory=%dbytes) released\n", sizeof(struct ip_vs_lblc_table)); return 0; }
/* * Syn-proxy snat handler: * 1) check and stop ack storm. * 2)Update in-out seqs: include th->seq * and also correct tcph->check. * * Return 0 if ack storm is found and stoped. */ int ip_vs_synproxy_snat_handler(struct tcphdr *tcph, struct ip_vs_conn *cp) { __u32 old_seq; if (syn_proxy_is_ack_storm(tcph, cp) == 0) { return 0; } if (cp->syn_proxy_seq.delta != 0) { old_seq = ntohl(tcph->seq); tcph->seq = htonl((__u32) (old_seq + cp->syn_proxy_seq.delta)); syn_proxy_seq_csum_update(tcph, htonl(old_seq), tcph->seq); IP_VS_DBG(6, "tcp_snat_handler: tcph->seq %u => %u, delta = %u \n", old_seq, htonl(tcph->seq), cp->syn_proxy_seq.delta); } return 1; }
static int ip_vs_sh_init_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s; /* allocate the SH table for this service */ s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); if (s == NULL) return -ENOMEM; svc->sched_data = s; IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); /* assign the hash buckets with current dests */ ip_vs_sh_reassign(s, svc); return 0; }