static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; struct nf_conn *ct = info->ct; struct nf_conn_help *help; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; typeof(nf_ct_timeout_put_hook) timeout_put; #endif if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); nf_ct_l3proto_module_put(par->family); #ifdef CONFIG_NF_CONNTRACK_TIMEOUT rcu_read_lock(); timeout_put = rcu_dereference(nf_ct_timeout_put_hook); if (timeout_put) { timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) timeout_put(timeout_ext->timeout); } rcu_read_unlock(); #endif } nf_ct_put(info->ct); }
static bool match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct xt_match *match, const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) { const struct xt_state_info *sinfo = matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (!ct) statebit = XT_STATE_INVALID; else { if (nf_ct_is_untracked(ct)) statebit = XT_STATE_UNTRACKED; else statebit = XT_STATE_BIT(ctinfo); } return (sinfo->statemask & statebit); }
static struct sock *xt_socket_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; #endif if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct udphdr _hdr, *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return NULL; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return NULL; } else { return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK /* Do the lookup with the original socket address in * case this is a reply packet of an established * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif return xt_socket_get_sock_v4(net, protocol, saddr, daddr, sport, dport, indev); }
static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); unsigned int ret; if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); nat = nfct_nat(ct); if (nat == NULL) { /* Conntrack module was loaded late, can't add extension. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) return NF_ACCEPT; } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED + IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, ops->hooknum)) return NF_DROP; else return NF_ACCEPT; } /* Fall through */ case IP_CT_NEW: if (nf_nat_initialized(ct, maniptype)) break; ret = nft_do_chain(ops, skb, in, out, okfn); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { ret = nf_nat_alloc_null_binding(ct, ops->hooknum); if (ret != NF_ACCEPT) return ret; } default: break; } return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); }
static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) { struct xt_ct_target_info *info = par->targinfo; struct nf_conn *ct = info->ct; struct nf_conn_help *help; if (!nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); nf_ct_l3proto_module_put(par->family); } nf_ct_put(info->ct); }
static bool state_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; if (nf_ct_is_untracked(skb)) statebit = XT_STATE_UNTRACKED; else if (!nf_ct_get(skb, &ctinfo)) statebit = XT_STATE_INVALID; else statebit = XT_STATE_BIT(ctinfo); return (sinfo->statemask & statebit); }
static bool xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct sk_buff *pskb = (struct sk_buff *)skb; const struct xt_cluster_match_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned long hash; /* This match assumes that all nodes see the same packets. This can be * achieved if the switch that connects the cluster nodes support some * sort of 'port mirroring'. However, if your switch does not support * this, your cluster nodes can reply ARP request using a multicast MAC * address. Thus, your switch will flood the same packets to the * cluster nodes with the same multicast MAC address. Using a multicast * link address is a RFC 1812 (section 3.3.2) violation, but this works * fine in practise. * * Unfortunately, if you use the multicast MAC address, the link layer * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted * by TCP and others for packets coming to this node. For that reason, * this match mangles skbuff's pkt_type if it detects a packet * addressed to a unicast address but using PACKET_MULTICAST. Yes, I * know, matches should not alter packets, but we are doing this here * because we would need to add a PKTTYPE target for this sole purpose. */ if (!xt_cluster_is_multicast_addr(skb, par->family) && skb->pkt_type == PACKET_MULTICAST) { pskb->pkt_type = PACKET_HOST; } ct = nf_ct_get(skb, &ctinfo); if (ct == NULL) return false; if (nf_ct_is_untracked(ct)) return false; if (ct->master) hash = xt_cluster_hash(ct->master, info); else hash = xt_cluster_hash(ct, info); return !!((1 << hash) & info->node_mask) ^ !!(info->flags & XT_CLUSTER_F_INV); }
static int hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, const struct xt_hmark_info *info) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); struct nf_conntrack_tuple *otuple; struct nf_conntrack_tuple *rtuple; if (ct == NULL || nf_ct_is_untracked(ct)) return -1; otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, info->src_mask.all); t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, info->dst_mask.all); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; t->proto = nf_ct_protonum(ct); if (t->proto != IPPROTO_ICMP) { t->uports.p16.src = otuple->src.u.all; t->uports.p16.dst = rtuple->src.u.all; t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | info->port_set.v32; if (t->uports.p16.dst < t->uports.p16.src) swap(t->uports.p16.dst, t->uports.p16.src); } return 0; #else return -1; #endif }
static int match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const void *matchinfo, int offset, unsigned int protoff, int *hotdrop) { const struct xt_state_info *sinfo = matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; if (nf_ct_is_untracked(skb)) statebit = XT_STATE_UNTRACKED; else if (!nf_ct_get_ctinfo(skb, &ctinfo)) statebit = XT_STATE_INVALID; else statebit = XT_STATE_BIT(ctinfo); return (sinfo->statemask & statebit); }
int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ int local, rc; EnterFunction(10); rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; p = skb_header_pointer(skb, ipvsh->len, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp, ipvsh)) goto tx_error; ipv6_hdr(skb)->daddr = cp->daddr.in6; IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); LeaveFunction(10); return rc; tx_error: LeaveFunction(10); kfree_skb(skb); rcu_read_unlock(); return NF_STOLEN; }
int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset, unsigned int hooknum, struct ip_vs_iphdr *ipvsh) { struct rt6_info *rt; /* Route to the other host */ int rc; int local; int rt_mode; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp, ipvsh); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ /* LOCALNODE from FORWARD hook is not supported */ rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; rcu_read_lock(); local = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) goto tx_error; rt = (struct rt6_info *) skb_dst(skb); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if IS_ENABLED(CONFIG_NF_CONNTRACK) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); goto tx_error; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); goto tx_error; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error; ip_vs_nat_icmp_v6(skb, pp, cp, 0); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); return rc; }
static unsigned int nf_nat_ipv6_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); __be16 frag_off; int hdrlen; u8 nexthdr; ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { /* NAT module was loaded late. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: nexthdr = ipv6_hdr(skb)->nexthdr; hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, hooknum, hdrlen)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) goto oif_changed; } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); return NF_DROP; }
static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; /* */ const u_int8_t family = par->family; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; bool match = true; if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { match = skb->ipvs_property ^ !!(data->invert & XT_IPVS_IPVS_PROPERTY); goto out; } /* */ if (!skb->ipvs_property) { match = false; goto out; } ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ !(data->invert & XT_IPVS_PROTO)) { match = false; goto out; } pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) { match = false; goto out; } /* */ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* */); if (unlikely(cp == NULL)) { match = false; goto out; } /* */ if (data->bitmask & XT_IPVS_VPORT) if ((cp->vport == data->vport) ^ !(data->invert & XT_IPVS_VPORT)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VPORTCTL) if ((cp->control != NULL && cp->control->vport == data->vportctl) ^ !(data->invert & XT_IPVS_VPORTCTL)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_DIR) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct == NULL || nf_ct_is_untracked(ct)) { match = false; goto out_put_cp; } if ((ctinfo >= IP_CT_IS_REPLY) ^ !!(data->invert & XT_IPVS_DIR)) { match = false; goto out_put_cp; } } if (data->bitmask & XT_IPVS_METHOD) if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ !(data->invert & XT_IPVS_METHOD)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VADDR) { if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, family) ^ !(data->invert & XT_IPVS_VADDR)) { match = false; goto out_put_cp; } } out_put_cp: __ip_vs_conn_put(cp); out: pr_debug("match=%d\n", match); return match; }
static void nft_ct_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); u32 *dest = ®s->data[priv->dreg]; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; const struct nf_conn_help *help; const struct nf_conntrack_tuple *tuple; const struct nf_conntrack_helper *helper; unsigned int state; ct = nf_ct_get(pkt->skb, &ctinfo); switch (priv->key) { case NFT_CT_STATE: if (ct == NULL) state = NF_CT_STATE_INVALID_BIT; else if (nf_ct_is_untracked(ct)) state = NF_CT_STATE_UNTRACKED_BIT; else state = NF_CT_STATE_BIT(ctinfo); *dest = state; return; default: break; } if (ct == NULL) goto err; switch (priv->key) { case NFT_CT_DIRECTION: *dest = CTINFO2DIR(ctinfo); return; case NFT_CT_STATUS: *dest = ct->status; return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: *dest = ct->mark; return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: *dest = ct->secmark; return; #endif case NFT_CT_EXPIRATION: *dest = jiffies_to_msecs(nf_ct_expires(ct)); return; case NFT_CT_HELPER: if (ct->master == NULL) goto err; help = nfct_help(ct->master); if (help == NULL) goto err; helper = rcu_dereference(help->helper); if (helper == NULL) goto err; strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN); return; #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: { struct nf_conn_labels *labels = nf_ct_labels_find(ct); unsigned int size; if (!labels) { memset(dest, 0, NF_CT_LABELS_MAX_SIZE); return; } size = labels->words * sizeof(long); memcpy(dest, labels->bits, size); if (size < NF_CT_LABELS_MAX_SIZE) memset(((char *) dest) + size, 0, NF_CT_LABELS_MAX_SIZE - size); return; } #endif case NFT_CT_BYTES: /* fallthrough */ case NFT_CT_PKTS: { const struct nf_conn_acct *acct = nf_conn_acct_find(ct); u64 count = 0; if (acct) count = nft_ct_get_eval_counter(acct->counter, priv->key, priv->dir); memcpy(dest, &count, sizeof(count)); return; } default: break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { case NFT_CT_L3PROTOCOL: *dest = nf_ct_l3num(ct); return; case NFT_CT_SRC: memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_DST: memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTOCOL: *dest = nf_ct_protonum(ct); return; case NFT_CT_PROTO_SRC: *dest = (__force __u16)tuple->src.u.all; return; case NFT_CT_PROTO_DST: *dest = (__force __u16)tuple->dst.u.all; return; default: break; } return; err: regs->verdict.code = NFT_BREAK; }
static unsigned int ntrack_hook_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) #endif { #if (LINUX_VERSION_CODE > KERNEL_VERSION(3,18,20)) struct net_device *in = state->in, *out = state->out; #endif int ret = NF_ACCEPT; struct nf_conn *ct; struct iphdr *iph; flow_info_t *fi; user_info_t *ui; // struct sk_buff *linear_skb = NULL, *use_skb = NULL; enum ip_conntrack_info ctinfo; struct nos_track* nos; ct = nf_ct_get(skb, &ctinfo); if (!ct) { return NF_ACCEPT; } if(nf_ct_is_untracked(ct)) { return NF_ACCEPT; } if((nos = nf_ct_get_nos(ct)) == NULL) { nt_debug("nos untracked.\n"); return NF_ACCEPT; } fi = nt_flow(nos); ui = nt_user(nos); if(!fi || !ui) { return NF_ACCEPT; } if(!nt_flow_track(fi)) { return NF_ACCEPT; } iph = ip_hdr(skb); if(iph) { nt_msghdr_t hdr; pkt_cap_t pcap; /* id & magic -> for userspace to find the kernel ui node. */ pcap.id = fi->id; pcap.magic = fi->magic; memcpy(pcap->data, (uint8_t*)iph, dlen); /* xmit message to userspace. */ nt_msghdr_init(&hdr, en_MSG_PCAP, sizeof(pcap)); if(nt_msg_enqueue(&hdr, &pcap, 0)) { nt_debug("skb capture failed.\n"); } } return ret; }
/* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP */ int ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ int mtu; struct iphdr *iph = ip_hdr(skb); int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); if (!local) { /* drop old route */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); goto tx_error; }
/* * ICMP packet transmitter * called by the ip_vs_in_icmp */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", __func__, &cp->daddr.ip); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && !skb_is_gso(skb)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp(skb, pp, cp, 0); if (!local) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { ip_rt_put(rt); /* * Some IPv4 replies get local address from routes, * not from iph, so while we DNAT after routing * we need this second input/output route. */ if (!__ip_vs_reroute_locally(skb)) goto tx_error; } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: ip_rt_put(rt); goto tx_error; }
/* * Look at outgoing ftp packets to catch the response to a PASV command * from the server (inside-to-outside). * When we see one, we build a connection entry with the client address, * client port 0 (unknown at the moment), the server address and the * server port. Mark the current connection entry as a control channel * of the new entry. All this work is just to make the data connection * can be scheduled to the right server later. * * The outgoing packet should be something like * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)". * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct sk_buff *skb, int *diff) { struct iphdr *iph; struct tcphdr *th; char *data, *data_limit; char *start, *end; union nf_inet_addr from; __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, * so turn this into a no-op for IPv6 packets */ if (cp->af == AF_INET6) return 1; #endif *diff = 0; /* Only useful for established sessions */ if (cp->state != IP_VS_TCP_S_ESTABLISHED) return 1; /* Linear packets are much easier to deal with. */ if (!skb_make_writable(skb, skb->len)) return 0; if (cp->app_data == &ip_vs_ftp_pasv) { iph = ip_hdr(skb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); data_limit = skb_tail_pointer(skb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, sizeof(SERVER_STRING)-1, '(', ')', &from.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PASV response (%pI4:%d) -> %pI4:%d detected\n", &from.ip, ntohs(port), &cp->caddr.ip, 0); /* * Now update or create an connection entry for it */ { struct ip_vs_conn_param p; ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, iph->protocol, &from, port, &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NFCT, cp->dest, skb->mark); if (!n_cp) return 0; /* add its controller */ ip_vs_control_add(n_cp, cp); } /* * Replace the old passive address with the new one */ from.ip = n_cp->vaddr.ip; port = n_cp->vport; snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u", ((unsigned char *)&from.ip)[0], ((unsigned char *)&from.ip)[1], ((unsigned char *)&from.ip)[2], ((unsigned char *)&from.ip)[3], ntohs(port) >> 8, ntohs(port) & 0xFF); buf_len = strlen(buf); ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && nfct_nat(ct)) { /* If mangling fails this function will return 0 * which will cause the packet to be dropped. * Mangling can only fail under memory pressure, * hopefully it will succeed on the retransmitted * packet. */ ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, start-data, end-start, buf, buf_len); if (ret) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_UNNECESSARY; /* csum is updated */ ret = 1; } } /* * Not setting 'diff' is intentional, otherwise the sequence * would be adjusted twice. */ net = skb_net(skb); cp->app_data = NULL; ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return ret; }
static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; __be32 daddr, saddr; __be16 dport, sport; u8 protocol; #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; #endif if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return false; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return false; } else { return false; } #ifdef XT_SOCKET_HAVE_CONNTRACK /* Do the lookup with the original socket address in case this is a * reply packet of an established SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); if (sk != NULL) { bool wildcard; bool transparent = true; /* Ignore sockets listening on INADDR_ANY */ wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, if XT_SOCKET_TRANSPARENT is used */ if (info && info->flags & XT_SOCKET_TRANSPARENT) transparent = ((sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->transparent) || (sk->sk_state == TCP_TIME_WAIT && inet_twsk(sk)->tw_transparent)); xt_socket_put_sk(sk); if (wildcard || !transparent) sk = NULL; } pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", protocol, &saddr, ntohs(sport), &daddr, ntohs(dport), &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); return (sk != NULL); }
static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) { struct sfq_packet_info info; u32 pert = q->perturbation; unsigned mask = (1<<q->hash_divisor) - 1; #ifdef CONFIG_NET_SCH_SFQ_NFCT enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); #endif switch (skb->protocol) { case __constant_htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); info.dst = iph->daddr; info.src = iph->saddr; if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && (iph->protocol == IPPROTO_TCP || iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_UDPLITE || iph->protocol == IPPROTO_SCTP || iph->protocol == IPPROTO_DCCP || iph->protocol == IPPROTO_ESP)) info.proto = *(((u32*)iph) + iph->ihl); else info.proto = iph->protocol; break; } case __constant_htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); /* Hash ipv6 addresses into a u32. This isn't ideal, * but the code is simple. */ info.dst = jhash2(iph->daddr.s6_addr32, 4, q->perturbation); info.src = jhash2(iph->saddr.s6_addr32, 4, q->perturbation); if (iph->nexthdr == IPPROTO_TCP || iph->nexthdr == IPPROTO_UDP || iph->nexthdr == IPPROTO_UDPLITE || iph->nexthdr == IPPROTO_SCTP || iph->nexthdr == IPPROTO_DCCP || iph->nexthdr == IPPROTO_ESP) info.proto = *(u32*)&iph[1]; else info.proto = iph->nexthdr; break; } default: info.dst = (u32)(unsigned long)skb->dst; info.src = (u32)(unsigned long)skb->sk; info.proto = skb->protocol; } info.mark = skb->mark; #ifdef CONFIG_NET_SCH_SFQ_NFCT /* defaults if there is no conntrack info */ info.ctorigsrc = info.src; info.ctorigdst = info.dst; info.ctreplsrc = info.dst; info.ctrepldst = info.src; /* collect conntrack info */ if (ct && !nf_ct_is_untracked(ct)) { if (skb->protocol == __constant_htons(ETH_P_IP)) { info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip; info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip; info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip; } else if (skb->protocol == __constant_htons(ETH_P_IPV6)) { /* Again, hash ipv6 addresses into a single u32. */ info.ctorigsrc = jhash2( ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6, 4, pert); info.ctorigdst = jhash2( ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip6, 4, pert); info.ctreplsrc = jhash2( ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6, 4, pert); info.ctrepldst = jhash2( ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6, 4, pert); } } #endif switch (q->hash_kind) { case TCA_SFQ_HASH_CLASSIC: return jhash_3words(info.dst, info.src, info.proto, pert) & mask; case TCA_SFQ_HASH_DST: return jhash_1word(info.dst, pert) & mask; case TCA_SFQ_HASH_SRC: return jhash_1word(info.src, pert) & mask; case TCA_SFQ_HASH_FWMARK: return jhash_1word(info.mark, pert) & mask; #ifdef CONFIG_NET_SCH_SFQ_NFCT case TCA_SFQ_HASH_CTORIGDST: return jhash_1word(info.ctorigdst, pert) & mask; case TCA_SFQ_HASH_CTORIGSRC: return jhash_1word(info.ctorigsrc, pert) & mask; case TCA_SFQ_HASH_CTREPLDST: return jhash_1word(info.ctrepldst, pert) & mask; case TCA_SFQ_HASH_CTREPLSRC: return jhash_1word(info.ctreplsrc, pert) & mask; case TCA_SFQ_HASH_CTNATCHG: { if (info.ctorigdst == info.ctreplsrc) return jhash_1word(info.ctorigsrc, pert) & mask; return jhash_1word(info.ctreplsrc, pert) & mask; } #else case TCA_SFQ_HASH_CTORIGDST: case TCA_SFQ_HASH_CTORIGSRC: case TCA_SFQ_HASH_CTREPLDST: case TCA_SFQ_HASH_CTREPLSRC: case TCA_SFQ_HASH_CTNATCHG: if (net_ratelimit()) printk(KERN_WARNING "SFQ: Conntrack support not enabled."); #endif } if (net_ratelimit()) printk(KERN_WARNING "SFQ: Unknown hash method. " "Falling back to classic.\n"); q->hash_kind = TCA_SFQ_HASH_CLASSIC; return jhash_3words(info.dst, info.src, info.proto, pert) & mask; }
static bool ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_ipvs_mtinfo *data = par->matchinfo; struct netns_ipvs *ipvs = net_ipvs(par->net); /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ const u_int8_t family = par->family; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; bool match = true; if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { match = skb->ipvs_property ^ !!(data->invert & XT_IPVS_IPVS_PROPERTY); goto out; } /* other flags than XT_IPVS_IPVS_PROPERTY are set */ if (!skb->ipvs_property) { match = false; goto out; } ip_vs_fill_iph_skb(family, skb, true, &iph); if (data->bitmask & XT_IPVS_PROTO) if ((iph.protocol == data->l4proto) ^ !(data->invert & XT_IPVS_PROTO)) { match = false; goto out; } pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) { match = false; goto out; } /* * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(ipvs, family, skb, &iph); if (unlikely(cp == NULL)) { match = false; goto out; } /* * We found a connection, i.e. ct != 0, make sure to call * __ip_vs_conn_put before returning. In our case jump to out_put_con. */ if (data->bitmask & XT_IPVS_VPORT) if ((cp->vport == data->vport) ^ !(data->invert & XT_IPVS_VPORT)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VPORTCTL) if ((cp->control != NULL && cp->control->vport == data->vportctl) ^ !(data->invert & XT_IPVS_VPORTCTL)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_DIR) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); if (ct == NULL || nf_ct_is_untracked(ct)) { match = false; goto out_put_cp; } if ((ctinfo >= IP_CT_IS_REPLY) ^ !!(data->invert & XT_IPVS_DIR)) { match = false; goto out_put_cp; } } if (data->bitmask & XT_IPVS_METHOD) if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ !(data->invert & XT_IPVS_METHOD)) { match = false; goto out_put_cp; } if (data->bitmask & XT_IPVS_VADDR) { if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, &data->vmask, family) ^ !(data->invert & XT_IPVS_VADDR)) { match = false; goto out_put_cp; } } out_put_cp: __ip_vs_conn_put(cp); out: pr_debug("match=%d\n", match); return match; }
struct sock* xt_socket_get4_sk(const struct sk_buff *skb, struct xt_action_param *par) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk; __be32 daddr, saddr; __be16 dport = 0, sport = 0; u8 protocol = 0; #ifdef XT_SOCKET_HAVE_CONNTRACK struct nf_conn const *ct; enum ip_conntrack_info ctinfo; #endif if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) return NULL; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return NULL; } else { return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK /* Do the lookup with the original socket address in case this is a * reply packet of an established SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", protocol, &saddr, ntohs(sport), &daddr, ntohs(dport), &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); return sk; }
static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) { struct xt_ct_target_info *info = par->targinfo; struct xt_ct_target_info_v1 info_v1 = { .flags = info->flags, .zone = info->zone, .ct_events = info->ct_events, .exp_events = info->exp_events, }; int ret; if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; memcpy(info_v1.helper, info->helper, sizeof(info->helper)); ret = xt_ct_tg_check(par, &info_v1); if (ret < 0) return ret; info->ct = info_v1.ct; return ret; } static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; if (info->flags & ~XT_CT_NOTRACK) return -EINVAL; return xt_ct_tg_check(par, par->targinfo); } static int xt_ct_tg_check_v2(const struct xt_tgchk_param *par) { struct xt_ct_target_info_v1 *info = par->targinfo; if (info->flags & ~XT_CT_MASK) return -EINVAL; return xt_ct_tg_check(par, par->targinfo); } static void xt_ct_destroy_timeout(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT struct nf_conn_timeout *timeout_ext; typeof(nf_ct_timeout_put_hook) timeout_put; rcu_read_lock(); timeout_put = rcu_dereference(nf_ct_timeout_put_hook); if (timeout_put) { timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) timeout_put(timeout_ext->timeout); } rcu_read_unlock(); #endif } static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par, struct xt_ct_target_info_v1 *info) { struct nf_conn *ct = info->ct; struct nf_conn_help *help; if (ct && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); if (help) module_put(help->helper->me); nf_ct_l3proto_module_put(par->family); xt_ct_destroy_timeout(ct); nf_ct_put(info->ct); } } static void xt_ct_tg_destroy_v0(const struct xt_tgdtor_param *par) { struct xt_ct_target_info *info = par->targinfo; struct xt_ct_target_info_v1 info_v1 = { .flags = info->flags, .zone = info->zone, .ct_events = info->ct_events, .exp_events = info->exp_events, .ct = info->ct, }; memcpy(info_v1.helper, info->helper, sizeof(info->helper)); xt_ct_tg_destroy(par, &info_v1); } static void xt_ct_tg_destroy_v1(const struct xt_tgdtor_param *par) { xt_ct_tg_destroy(par, par->targinfo); } static struct xt_target xt_ct_tg_reg[] __read_mostly = { { .name = "CT", .family = NFPROTO_UNSPEC, .targetsize = sizeof(struct xt_ct_target_info), .checkentry = xt_ct_tg_check_v0, .destroy = xt_ct_tg_destroy_v0, .target = xt_ct_target_v0, .table = "raw", .me = THIS_MODULE, }, { .name = "CT", .family = NFPROTO_UNSPEC, .revision = 1, .targetsize = sizeof(struct xt_ct_target_info_v1), .checkentry = xt_ct_tg_check_v1, .destroy = xt_ct_tg_destroy_v1, .target = xt_ct_target_v1, .table = "raw", .me = THIS_MODULE, }, { .name = "CT",
static unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) { /* NAT module was loaded late. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); if (nat == NULL) { pr_debug("failed to add NAT extension\n"); return NF_ACCEPT; } } switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, ops->hooknum)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) goto oif_changed; } return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); return NF_DROP; }
unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, unsigned int (*do_chain)(void *priv, struct sk_buff *skb, const struct nf_hook_state *state, struct nf_conn *ct)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook); /* We never see fragments: conntrack defrags on pre-routing * and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would * have dropped it. Hence it's the user's responsibilty to * packet filter it out, or implement conntrack/NAT for that * protocol. 8) --RR */ if (!ct) return NF_ACCEPT; /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nf_ct_nat_ext_add(ct); if (nat == NULL) return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, state->hook)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; ret = do_chain(priv, skb, state, ct); if (ret != NF_ACCEPT) return ret; if (nf_nat_initialized(ct, HOOK2MANIP(state->hook))) break; ret = nf_nat_alloc_null_binding(ct, state->hook); if (ret != NF_ACCEPT) return ret; } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } return nf_nat_packet(ct, ctinfo, state->hook, skb); oif_changed: nf_ct_kill_acct(ct, ctinfo, skb); return NF_DROP; }
int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset) { struct rt6_info *rt; /* Route to the other host */ int mtu; int rc; int local; EnterFunction(10); /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be forwarded directly here, because there is no need to translate address/port back */ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { if (cp->packet_xmit) rc = cp->packet_xmit(skb, cp, pp); else rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc_unchecked(&cp->in_pkts); goto out; } /* * mangle and send the packet here (only for VS/NAT) */ if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, 1|2|4))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG(10, "%s(): " "stopping DNAT to local address %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI6\n", __func__, &cp->daddr.in6); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; ip_vs_nat_icmp_v6(skb, pp, cp, 0); if (!local || !skb->dev) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* destined to loopback, do we need to change route? */ dst_release(&rt->dst); } /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); rc = NF_STOLEN; goto out; tx_error_icmp: dst_link_failure(skb); tx_error: dev_kfree_skb(skb); rc = NF_STOLEN; out: LeaveFunction(10); return rc; tx_error_put: dst_release(&rt->dst); goto tx_error; }
static unsigned int nf_nat_fn(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; struct nf_conn_nat *nat; /* maniptype == SRC for postrouting. */ enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); ct = nf_ct_get(skb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that protocol. 8) --RR */ if (!ct) { /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) return NF_DROP; } return NF_ACCEPT; } /* Don't try to NAT if this packet is not conntracked */ if (nf_ct_is_untracked(ct)) return NF_ACCEPT; nat = nfct_nat(ct); if (!nat) return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, hooknum, skb)) return NF_DROP; else return NF_ACCEPT; } /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, or local packets.. */ if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; if (unlikely(nf_ct_is_confirmed(ct))) /* NAT module was loaded late */ ret = alloc_null_binding_confirmed(ct, hooknum); else if (hooknum == NF_IP_LOCAL_IN) /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { return ret; } } else DEBUGP("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } return nf_nat_packet(ct, ctinfo, hooknum, skb); }
int ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ int mtu; int local; EnterFunction(10); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; p = skb_header_pointer(skb, sizeof(struct ipv6hdr), sizeof(_pt), &_pt); if (p == NULL) goto tx_error; ip_vs_conn_fill_cport(cp, *p); IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, 0, 1|2|4))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* * Avoid duplicate tuple in reply direction for NAT traffic * to local address when connection is sync-ed */ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) if (cp->flags & IP_VS_CONN_F_SYNC && local) { enum ip_conntrack_info ctinfo; struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error_put; } } #endif /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error_put; } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu && !skb_is_gso(skb)) { if (!skb->dev) { struct net *net = dev_net(skb_dst(skb)->dev); skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error_put; } /* copy-on-write the packet before mangling it */ if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) goto tx_error_put; if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); if (!local || !skb->dev) { /* drop the old route when skb is not shared */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* destined to loopback, do we need to change route? */ dst_release(&rt->dst); } IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); LeaveFunction(10); return NF_STOLEN; tx_error_icmp: dst_link_failure(skb); tx_error: LeaveFunction(10); kfree_skb(skb); return NF_STOLEN; tx_error_put: dst_release(&rt->dst); goto tx_error; }
static void nft_ct_eval(const struct nft_expr *expr, struct nft_data data[NFT_REG_MAX + 1], const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); struct nft_data *dest = &data[priv->dreg]; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; const struct nf_conn_help *help; const struct nf_conntrack_tuple *tuple; const struct nf_conntrack_helper *helper; long diff; unsigned int state; ct = nf_ct_get(pkt->skb, &ctinfo); switch (priv->key) { case NFT_CT_STATE: if (ct == NULL) state = NF_CT_STATE_INVALID_BIT; else if (nf_ct_is_untracked(ct)) state = NF_CT_STATE_UNTRACKED_BIT; else state = NF_CT_STATE_BIT(ctinfo); dest->data[0] = state; return; } if (ct == NULL) goto err; switch (priv->key) { case NFT_CT_DIRECTION: dest->data[0] = CTINFO2DIR(ctinfo); return; case NFT_CT_STATUS: dest->data[0] = ct->status; return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: dest->data[0] = ct->mark; return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: dest->data[0] = ct->secmark; return; #endif case NFT_CT_EXPIRATION: diff = (long)jiffies - (long)ct->timeout.expires; if (diff < 0) diff = 0; dest->data[0] = jiffies_to_msecs(diff); return; case NFT_CT_HELPER: if (ct->master == NULL) goto err; help = nfct_help(ct->master); if (help == NULL) goto err; helper = rcu_dereference(help->helper); if (helper == NULL) goto err; if (strlen(helper->name) >= sizeof(dest->data)) goto err; strncpy((char *)dest->data, helper->name, sizeof(dest->data)); return; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { case NFT_CT_L3PROTOCOL: dest->data[0] = nf_ct_l3num(ct); return; case NFT_CT_SRC: memcpy(dest->data, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_DST: memcpy(dest->data, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTOCOL: dest->data[0] = nf_ct_protonum(ct); return; case NFT_CT_PROTO_SRC: dest->data[0] = (__force __u16)tuple->src.u.all; return; case NFT_CT_PROTO_DST: dest->data[0] = (__force __u16)tuple->dst.u.all; return; } return; err: data[NFT_REG_VERDICT].verdict = NFT_BREAK; }