int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rt6_info *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, 1|2|4))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp_v6(skb, pp, cp, 0); if (!local || !skb->dev) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* destined to loopback, do we need to change route? */ dst_release(&rt->dst); } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: dst_release(&rt->dst); goto tx_error; }
/* * ICMP packet transmitter * called by the ip_vs_in_icmp */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp(skb, pp, cp, 0); if (!local) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: ip_rt_put(rt); goto tx_error; }
int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ int rc; int local; int rt_mode; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp, ipvsh); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); goto tx_error; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error; ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; }
/* * Handle ICMP messages in the inside-to-outside direction (outgoing). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) { struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; *related = 1; /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); if (!skb) return NF_STOLEN; *pskb = skb; } iph = ip_hdr(skb); offset = ihl = iph->ihl * 4; ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); if (ic == NULL) return NF_DROP; IP_VS_DBG(12, "Outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", ic->type, ntohs(icmp_id(ic)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((ic->type != ICMP_DEST_UNREACH) && (ic->type != ICMP_SOURCE_QUENCH) && (ic->type != ICMP_TIME_EXCEEDED)) { *related = 0; return NF_ACCEPT; } /* Now find the contained IP header */ offset += sizeof(_icmph); cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ pp = ip_vs_proto_get(cih->protocol); if (!pp) return NF_ACCEPT; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && pp->dont_defrag)) return NF_ACCEPT; IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); offset += cih->ihl * 4; /* The embedded headers contain source and dest in reverse order */ cp = pp->conn_out_get(skb, pp, cih, offset, 1); if (!cp) return NF_ACCEPT; verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { IP_VS_ERR("shouldn't reach here, because the box is on the" "half connection in the tun/dr module.\n"); } /* Ensure the checksum is correct */ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { /* Failed checksum! */ IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); goto out; } if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); if (!ip_vs_make_skb_writable(pskb, offset)) goto out; skb = *pskb; ip_vs_nat_icmp(skb, pp, cp, 1); /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); skb->ipvs_property = 1; verdict = NF_ACCEPT; out: __ip_vs_conn_put(cp); return verdict; }
/* * Handle ICMP messages in the outside-to-inside direction (incoming) * and sometimes in outgoing direction from ip_vs_forward_icmp. * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. */ static int ip_vs_in_icmp(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph; struct icmphdr *icmph; struct iphdr *ciph; /* The ip header contained within the ICMP */ __u16 *pptr; /* port numbers from TCP/UDP contained header */ unsigned short len; unsigned short clen, csize; struct ip_vs_conn *cp; struct rtable *rt; /* Route to the other host */ int mtu; if (skb_is_nonlinear(skb)) { if (skb_linearize(skb, GFP_ATOMIC) != 0) return NF_DROP; } iph = skb->nh.iph; ip_send_check(iph); icmph = (struct icmphdr *)((char *)iph + (iph->ihl << 2)); len = ntohs(iph->tot_len) - (iph->ihl<<2); if (len < sizeof(struct icmphdr)) return NF_DROP; IP_VS_DBG(12, "icmp in (%d,%d) %u.%u.%u.%u -> %u.%u.%u.%u\n", icmph->type, ntohs(icmp_id(icmph)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_SOURCE_QUENCH) && (icmph->type != ICMP_TIME_EXCEEDED)) return NF_ACCEPT; /* * If we get here we have an ICMP error of one of the above 3 types * Now find the contained IP header */ clen = len - sizeof(struct icmphdr); if (clen < sizeof(struct iphdr)) return NF_DROP; ciph = (struct iphdr *) (icmph + 1); csize = ciph->ihl << 2; if (clen < csize) return NF_DROP; /* We are only interested ICMPs generated from TCP or UDP packets */ if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP) return NF_ACCEPT; /* Skip non-first embedded TCP/UDP fragments */ if (ciph->frag_off & __constant_htons(IP_OFFSET)) return NF_ACCEPT; /* We need at least TCP/UDP ports here */ if (clen < csize + sizeof(struct udphdr)) return NF_DROP; /* Ensure the checksum is correct */ if (ip_compute_csum((unsigned char *) icmph, len)) { /* Failed checksum! */ IP_VS_ERR_RL("incoming ICMP: failed checksum from " "%d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); return NF_DROP; } pptr = (__u16 *)&(((char *)ciph)[csize]); IP_VS_DBG(11, "Handling incoming ICMP for " "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n", NIPQUAD(ciph->saddr), ntohs(pptr[0]), NIPQUAD(ciph->daddr), ntohs(pptr[1])); /* This is pretty much what ip_vs_conn_in_get() does, except parameters are in the reverse order */ cp = ip_vs_conn_in_get(ciph->protocol, ciph->daddr, pptr[1], ciph->saddr, pptr[0]); if (cp == NULL) return NF_ACCEPT; ip_vs_in_stats(cp, skb); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { int ret; if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp); else ret = NF_ACCEPT; atomic_inc(&cp->in_pkts); ip_vs_conn_put(cp); return ret; } /* * mangle and send the packet here */ if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ mtu = rt->u.dst.pmtu; if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) { ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); goto tx_error; } /* drop old route */ dst_release(skb->dst); skb->dst = &rt->u.dst; /* copy-on-write the packet before mangling it */ if (ip_vs_skb_cow(skb, rt->u.dst.dev->hard_header_len, &iph, (unsigned char**)&icmph)) { ip_vs_conn_put(cp); return NF_DROP; } ciph = (struct iphdr *) (icmph + 1); pptr = (__u16 *)&(((char *)ciph)[csize]); /* The ICMP packet for VS/NAT must be written to correct addresses before being forwarded to the right server */ /* First change the dest IP address, and recalc checksum */ iph->daddr = cp->daddr; ip_send_check(iph); /* Now change the *source* address in the contained IP */ ciph->saddr = cp->daddr; ip_send_check(ciph); /* the TCP/UDP source port - cannot redo check */ pptr[0] = cp->dport; /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "Forwarding incoming ICMP to " "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n", NIPQUAD(ciph->saddr), ntohs(pptr[0]), NIPQUAD(ciph->daddr), ntohs(pptr[1])); #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 1 << NF_IP_LOCAL_OUT; #endif /* CONFIG_NETFILTER_DEBUG */ ip_send(skb); ip_vs_conn_put(cp); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); ip_vs_conn_put(cp); return NF_STOLEN; }
/* * Handle ICMP messages in the inside-to-outside direction (outgoing). * Find any that might be relevant, check against existing connections, * forward to the right destination host if relevant. * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ static int ip_vs_out_icmp(struct sk_buff **skb_p) { struct sk_buff *skb = *skb_p; struct iphdr *iph; struct icmphdr *icmph; struct iphdr *ciph; /* The ip header contained within the ICMP */ __u16 *pptr; /* port numbers from TCP/UDP contained header */ unsigned short ihl; unsigned short len; unsigned short clen, csize; struct ip_vs_conn *cp; /* reassemble IP fragments, but will it happen in ICMP packets?? */ if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) { skb = ip_defrag(skb, IP_DEFRAG_VS_OUT); if (!skb) return NF_STOLEN; *skb_p = skb; } if (skb_is_nonlinear(skb)) { if (skb_linearize(skb, GFP_ATOMIC) != 0) return NF_DROP; ip_send_check(skb->nh.iph); } iph = skb->nh.iph; ihl = iph->ihl << 2; icmph = (struct icmphdr *)((char *)iph + ihl); len = ntohs(iph->tot_len) - ihl; if (len < sizeof(struct icmphdr)) return NF_DROP; IP_VS_DBG(12, "outgoing ICMP (%d,%d) %u.%u.%u.%u->%u.%u.%u.%u\n", icmph->type, ntohs(icmp_id(icmph)), NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); /* * Work through seeing if this is for us. * These checks are supposed to be in an order that means easy * things are checked first to speed up processing.... however * this means that some packets will manage to get a long way * down this stack and then be rejected, but that's life. */ if ((icmph->type != ICMP_DEST_UNREACH) && (icmph->type != ICMP_SOURCE_QUENCH) && (icmph->type != ICMP_TIME_EXCEEDED)) return NF_ACCEPT; /* Now find the contained IP header */ clen = len - sizeof(struct icmphdr); if (clen < sizeof(struct iphdr)) return NF_DROP; ciph = (struct iphdr *) (icmph + 1); csize = ciph->ihl << 2; if (clen < csize) return NF_DROP; /* We are only interested ICMPs generated from TCP or UDP packets */ if (ciph->protocol != IPPROTO_UDP && ciph->protocol != IPPROTO_TCP) return NF_ACCEPT; /* Skip non-first embedded TCP/UDP fragments */ if (ciph->frag_off & __constant_htons(IP_OFFSET)) return NF_ACCEPT; /* We need at least TCP/UDP ports here */ if (clen < csize + sizeof(struct udphdr)) return NF_DROP; /* * Find the ports involved - this packet was * incoming so the ports are right way round * (but reversed relative to outer IP header!) */ pptr = (__u16 *)&(((char *)ciph)[csize]); /* Ensure the checksum is correct */ if (ip_compute_csum((unsigned char *) icmph, len)) { /* Failed checksum! */ IP_VS_DBG(1, "forward ICMP: failed checksum from %d.%d.%d.%d!\n", NIPQUAD(iph->saddr)); return NF_DROP; } IP_VS_DBG(11, "Handling outgoing ICMP for " "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n", NIPQUAD(ciph->saddr), ntohs(pptr[0]), NIPQUAD(ciph->daddr), ntohs(pptr[1])); /* ciph content is actually <protocol, caddr, cport, daddr, dport> */ cp = ip_vs_conn_out_get(ciph->protocol, ciph->daddr, pptr[1], ciph->saddr, pptr[0]); if (!cp) return NF_ACCEPT; if (IP_VS_FWD_METHOD(cp) != 0) { IP_VS_ERR("shouldn't reach here, because the box is on the" "half connection in the tun/dr module.\n"); } /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = cp->vaddr; ip_send_check(iph); /* Now change the *dest* address in the contained IP */ ciph->daddr = cp->vaddr; ip_send_check(ciph); /* the TCP/UDP dest port - cannot redo check */ pptr[1] = cp->vport; /* And finally the ICMP checksum */ icmph->checksum = 0; icmph->checksum = ip_compute_csum((unsigned char *) icmph, len); skb->ip_summed = CHECKSUM_UNNECESSARY; /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); ip_vs_conn_put(cp); IP_VS_DBG(11, "Forwarding correct outgoing ICMP to " "%u.%u.%u.%u:%d -> %u.%u.%u.%u:%d\n", NIPQUAD(ciph->saddr), ntohs(pptr[0]), NIPQUAD(ciph->daddr), ntohs(pptr[1])); skb->nfcache |= NFC_IPVS_PROPERTY; return NF_ACCEPT; }