static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; int retc; ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); if (iph.protocol != IPPROTO_UDP) return -EINVAL; dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; if ((retc=skb_linearize(skb)) < 0) return retc; dptr = skb->data + dataoff; datalen = skb->len - dataoff; if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) return -EINVAL; p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC); if (!p->pe_data) return -ENOMEM; p->pe_data_len = matchlen; return 0; }
static struct ip_vs_dest * ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_dest *dest; struct ip_vs_dh_bucket *tbl; struct ip_vs_iphdr iph; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); tbl = (struct ip_vs_dh_bucket *)svc->sched_data; dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr); if (!dest || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || atomic_read(&dest->weight) <= 0 || is_overloaded(dest)) { return NULL; } IP_VS_DBG_BUF(6, "DH: destination IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, &iph.daddr), IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); return dest; }
static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; struct ip_vs_iphdr iph; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); sh = skb_header_pointer(skb, iph.len, sizeof(_sctph), &_sctph); if (sh == NULL) return 0; sch = skb_header_pointer(skb, iph.len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { int ignored; if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( */ ip_vs_service_put(svc); *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; } return 0; } ip_vs_service_put(svc); } /* NF_ACCEPT */ return 1; }
static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; int retc; ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); /* Only useful with UDP */ if (iph.protocol != IPPROTO_UDP) return -EINVAL; /* No Data ? */ dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; if ((retc=skb_linearize(skb)) < 0) return retc; dptr = skb->data + dataoff; datalen = skb->len - dataoff; if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen)) return -EINVAL; /* N.B: pe_data is only set on success, * this allows fallback to the default persistence logic on failure */ p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC); if (!p->pe_data) return -ENOMEM; p->pe_data_len = matchlen; return 0; }
/* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, * or selects a server and creates a connection entry plus a template. * Locking: we are svc user (svc->refcnt), so we hold all dests too * Protocols supported: TCP, UDP */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, __be16 ports[2]) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ union nf_inet_addr snet; /* source network of the client, after masking */ ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); /* Mask saddr with the netmask to adjust template granularity */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); else #endif snet.ip = iph.saddr.ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), IP_VS_DBG_ADDR(svc->af, &snet)); /* * As far as we know, FTP is a very complicated network protocol, and * it uses control connection and data connections. For active FTP, * FTP server initialize data connection to the client, its source port * is often 20. For passive FTP, FTP server tells the clients the port * that it passively listens to, and the client issues the data * connection. In the tunneling or direct routing mode, the load * balancer is on the client-to-server half of connection, the port * number is unknown to the load balancer. So, a conn template like * <caddr, 0, vaddr, 0, daddr, 0> is created for persistent FTP * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> * is created for other persistent services. */ if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, &iph.daddr, ports[1]); else ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* * No template found or the dest of the connection * template is not available. */ dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); return NULL; } /* * Create a template like <protocol,caddr,0, * vaddr,vport,daddr,dport> for non-ftp service, * and <protocol,caddr,0,vaddr,0,daddr,0> * for ftp service. */ if (svc->port != FTPPORT) ct = ip_vs_conn_new(svc->af, iph.protocol, &snet, 0, &iph.daddr, ports[1], &dest->addr, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else ct = ip_vs_conn_new(svc->af, iph.protocol, &snet, 0, &iph.daddr, 0, &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; } dport = dest->port; } else { /* * Note: persistent fwmark-based services and persistent * port zero service are handled here. * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ if (svc->fwmark) { union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, &fwmark, 0); } else ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* * If it is not persistent port zero, return NULL, * otherwise create a connection template. */ if (svc->port) return NULL; dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); return NULL; } /* * Create a template according to the service */ if (svc->fwmark) { union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; ct = ip_vs_conn_new(svc->af, IPPROTO_IP, &snet, 0, &fwmark, 0, &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); } else ct = ip_vs_conn_new(svc->af, iph.protocol, &snet, 0, &iph.daddr, 0, &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; ct->timeout = svc->timeout; } else { /* set destination with the found template */ dest = ct->dest; } dport = ports[1]; }
static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, struct sk_buff *skb, __be16 src_port, __be16 dst_port, int *ignored) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport = 0; unsigned int flags; struct ip_vs_conn_param param; const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; union nf_inet_addr snet; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); else #endif snet.ip = iph.saddr.ip & svc->netmask; IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " "mnet %s\n", IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), IP_VS_DBG_ADDR(svc->af, &snet)); { int protocol = iph.protocol; const union nf_inet_addr *vaddr = &iph.daddr; __be16 vport = 0; if (dst_port == svc->port) { if (svc->port != FTPPORT) vport = dst_port; } else { if (svc->fwmark) { protocol = IPPROTO_IP; vaddr = &fwmark; } } if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, vaddr, vport, ¶m) < 0) { *ignored = -1; return NULL; } } ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { dest = svc->scheduler->schedule(svc, skb); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); kfree(param.pe_data); *ignored = 0; return NULL; } if (dst_port == svc->port && svc->port != FTPPORT) dport = dest->port; ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest, skb->mark); if (ct == NULL) { kfree(param.pe_data); *ignored = -1; return NULL; } ct->timeout = svc->timeout; } else { dest = ct->dest; kfree(param.pe_data); } dport = dst_port; if (dport == svc->port && dest->port) dport = dest->port; flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph.protocol == IPPROTO_UDP)? IP_VS_CONN_F_ONE_PACKET : 0; ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, src_port, &iph.daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { ip_vs_conn_put(ct); *ignored = -1; return NULL; } ip_vs_control_add(cp, ct); ip_vs_conn_put(ct); ip_vs_conn_stats(cp, svc); return cp; } struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *ignored) { struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; unsigned int flags; *ignored = 1; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (pptr[0] == FTPDATA) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling FTPDATA"); return NULL; } if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); return NULL; } if (svc->flags & IP_VS_SVC_F_PERSISTENT) return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); *ignored = 0; if (!svc->fwmark && pptr[1] != svc->port) { if (!svc->port) pr_err("Schedule: port zero only supported " "in persistent services, " "check your ipvs configuration\n"); return NULL; } dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); return NULL; } flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph.protocol == IPPROTO_UDP)? IP_VS_CONN_F_ONE_PACKET : 0; { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); if (!cp) { *ignored = -1; return NULL; } } IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", ip_vs_fwd_tag(cp), IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport), IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport), IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport), cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; } int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; #ifdef CONFIG_SYSCTL struct net *net; struct netns_ipvs *ipvs; int unicast; #endif ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } #ifdef CONFIG_SYSCTL net = skb_net(skb); #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; else #endif unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); ipvs = net_ipvs(net); if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph.protocol == IPPROTO_UDP)? IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, IP_VS_CONN_F_BYPASS | flags, NULL, skb->mark); if (!cp) return NF_DROP; } ip_vs_in_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); ret = cp->packet_xmit(skb, cp, pd->pp); atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; }
static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; } /* * Syn-proxy step 2 logic: receive client's * 3-handshake Ack packet */ if (ip_vs_synproxy_ack_rcv(af, skb, th, pp, cpp, &iph, verdict) == 0) { return 0; } if (th->syn && !th->ack && !th->fin && !th->rst && (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. * We have to drop this packet :( */ ip_vs_service_put(svc); *verdict = NF_DROP; return 0; } /* * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ *cpp = ip_vs_schedule(svc, skb, 0); if (!*cpp) { *verdict = ip_vs_leave(svc, skb, pp); return 0; } /* * Set private establish state timeout into cp from svc, * due cp may use its user establish state timeout * different from sysctl_ip_vs_tcp_timeouts */ (*cpp)->est_timeout = svc->est_timeout; ip_vs_service_put(svc); return 1; } /* drop tcp packet which send to vip and !vport */ if (sysctl_ip_vs_tcp_drop_entry && (svc = ip_vs_lookup_vip(af, iph.protocol, &iph.daddr))) { IP_VS_INC_ESTATS(ip_vs_esmib, DEFENCE_TCP_DROP); *verdict = NF_DROP; return 0; } return 1; }
static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; /* */ const u_int8_t family = par->family; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; bool match = true; if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { match = skb->ipvs_property ^ !!(data->invert & XT_IPVS_IPVS_PROPERTY); goto out; } /* */ if (!skb->ipvs_property) { match = false; goto out; } ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ !(data->invert & XT_IPVS_PROTO)) { match = false; goto out; } pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) { match = false; goto out; } /* */ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* */); if (unlikely(cp == NULL)) { match = false; goto out; } /* */ if (data->bitmask & XT_IPVS_VPORT) if ((cp->vport == data->vport) ^ !(data->invert & XT_IPVS_VPORT)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VPORTCTL) if ((cp->control != NULL && cp->control->vport == data->vportctl) ^ !(data->invert & XT_IPVS_VPORTCTL)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_DIR) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct == NULL || nf_ct_is_untracked(ct)) { match = false; goto out_put_cp; } if ((ctinfo >= IP_CT_IS_REPLY) ^ !!(data->invert & XT_IPVS_DIR)) { match = false; goto out_put_cp; } } if (data->bitmask & XT_IPVS_METHOD) if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ !(data->invert & XT_IPVS_METHOD)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VADDR) { if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, family) ^ !(data->invert & XT_IPVS_VADDR)) { match = false; goto out_put_cp; } } out_put_cp: __ip_vs_conn_put(cp); out: pr_debug("match=%d\n", match); return match; }