/* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply dir. */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ if (ct->status & statusbit) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); l4proto = __nf_nat_l4proto_find(target.src.l3num, target.dst.protonum); if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; }
/* Only called for SRC manip */ static int find_appropriate_src(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { const struct nf_conn *ct; struct nf_nat_conn_key key = { .net = net, .tuple = tuple, .zone = zone }; ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, nf_nat_bysource_params); if (!ct) return 0; nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; return in_range(l3proto, l4proto, result, range); }
/* Only called for SRC manip */ static int find_appropriate_src(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { const struct nf_conn *ct; struct nf_nat_conn_key key = { .net = net, .tuple = tuple, .zone = zone }; struct rhlist_head *hl; hl = rhltable_lookup(&nf_nat_bysource_table, &key, nf_nat_bysource_params); if (!hl) return 0; ct = container_of(hl, typeof(*ct), nat_bysource); nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; return in_range(l3proto, l4proto, result, range); }
/* Only called for SRC manip */ static int find_appropriate_src(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_nat_l3proto *l3proto, const struct nf_nat_l4proto *l4proto, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { const struct nf_conn *ct; struct nf_nat_conn_key key = { .net = net, .tuple = tuple, .zone = zone }; struct rhlist_head *hl, *h; hl = rhltable_lookup(&nf_nat_bysource_table, &key, nf_nat_bysource_params); rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { nf_ct_invert_tuplepr(result, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; if (in_range(l3proto, l4proto, result, range)) return 1; }
/* Is this tuple already taken? (not by us) */ int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { /* Conntrack tracking doesn't keep track of outgoing tuples; only * incoming ones. NAT means they don't have a fixed mapping, * so we invert the tuple and look for the incoming reply. * * We could keep a separate hash if this proves too slow. */ struct nf_conntrack_tuple reply; nf_ct_invert_tuplepr(&reply, tuple); return nf_conntrack_tuple_taken(&reply, ignored_conntrack); }
int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, struct nf_conn *ct) { struct net *net = nf_ct_net(ct); int ret; ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { /* * Reason: Hairpin translation for TCP/UDP not working * Modified: Ubicom * Date: 2009.11.18 */ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) { #if defined(CONFIG_IP_NF_TARGET_SNATP2P) if ((HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST)){ struct nf_conntrack_tuple reply_tuple, new_tuple; nf_ct_invert_tuplepr(&reply_tuple, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); if (find_appropriate_p2p_dst(net, &reply_tuple, &new_tuple)) { struct nf_nat_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_SNATP2P_DONE_MASK); ct->status |= IPS_SNATP2P_DST; range.flags = (IP_NAT_RANGE_MAP_IPS); //range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); //range.min = range.max = new_tuple.dst.u; range.min_ip = range.max_ip = new_tuple.dst.u3.ip; ret = nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); range.min_ip = range.max_ip = new_tuple.src.u3.ip; ret = nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); } } if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) #endif /* NUL mapping */ ret = alloc_null_binding(ct, hooknum); } } return ret; }
static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { struct net *net = nf_ct_net(ct); typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; pr_debug("increasing timeouts\n"); /* increase timeout of GRE data channel conntrack entry */ ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); else { struct nf_conntrack_tuple inv_t; struct nf_conntrack_expect *exp_other; /* obviously this tuple inversion only works until you do NAT */ nf_ct_invert_tuplepr(&inv_t, &exp->tuple); pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); exp_other = nf_ct_expect_find_get(net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); nf_ct_unexpect_related(exp_other); nf_ct_expect_put(exp_other); } else { pr_debug("not found\n"); } } rcu_read_unlock(); }
unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; /* nat helper or nfctnetlink also setup binding */ nat = nf_ct_nat_ext_add(ct); if (nat == NULL) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { int err; err = rhashtable_insert_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; }
unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; /* Can't setup nat info for confirmed ct. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { struct nf_nat_conn_key key = { .net = nf_ct_net(ct), .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, .zone = nf_ct_zone(ct), }; int err; err = rhltable_insert_key(&nf_nat_bysource_table, &key, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; } EXPORT_SYMBOL(nf_nat_setup_info); static unsigned int __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { .flags = NF_NAT_RANGE_MAP_IPS, .min_addr = ip, .max_addr = ip, }; return nf_nat_setup_info(ct, &range, manip); } unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply dir. */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ if (ct->status & statusbit) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); l4proto = __nf_nat_l4proto_find(target.src.l3num, target.dst.protonum); if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; } EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; }; /* kill conntracks with affected NAT section */ static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; return i->status & IPS_NAT_MASK ? 1 : 0; } static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { if (nf_nat_proto_remove(ct, data)) return 1; if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ return 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, .l4proto = l4proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } /* Protocol registration. */ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) { const struct nf_nat_l4proto **l4protos; unsigned int i; int ret = 0; mutex_lock(&nf_nat_proto_mutex); if (nf_nat_l4protos[l3proto] == NULL) { l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), GFP_KERNEL); if (l4protos == NULL) { ret = -ENOMEM; goto out; } for (i = 0; i < IPPROTO_MAX; i++) RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. */ smp_wmb(); nf_nat_l4protos[l3proto] = l4protos; } if (rcu_dereference_protected( nf_nat_l4protos[l3proto][l4proto->l4proto], lockdep_is_held(&nf_nat_proto_mutex) ) != &nf_nat_l4proto_unknown) { ret = -EBUSY; goto out; } RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); out: mutex_unlock(&nf_nat_proto_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); /* No one stores the protocol anywhere; simply delete it. */ void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) { mutex_lock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], &nf_nat_l4proto_unknown); mutex_unlock(&nf_nat_proto_mutex); synchronize_rcu(); nf_nat_l4proto_clean(l3proto, l4proto->l4proto); }
int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum) { struct { struct icmphdr icmp; struct iphdr ip; } *inside; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); unsigned int hdrlen = ip_hdrlen(skb); const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; unsigned long statusbit; WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) return 0; inside = (void *)skb->data + hdrlen; if (inside->icmp.type == ICMP_REDIRECT) { if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) return 0; if (ct->status & IPS_NAT_MASK) return 0; } if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply direction */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; if (!(ct->status & statusbit)) return 1; l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), l4proto, &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt may reallocate */ inside = (void *)skb->data + hdrlen; inside->icmp.checksum = 0; inside->icmp.checksum = csum_fold(skb_checksum(skb, hdrlen, skb->len - hdrlen, 0)); } /* Change outer to look like the reply to an incoming packet */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) return 0; return 1; }
int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, unsigned int hdrlen) { struct { struct icmp6hdr icmp6; struct ipv6hdr ip6; } *inside; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); const struct nf_nat_l4proto *l4proto; struct nf_conntrack_tuple target; unsigned long statusbit; NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) return 0; inside = (void *)skb->data + hdrlen; if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) return 0; if (ct->status & IPS_NAT_MASK) return 0; } if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply direction */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; if (!(ct->status & statusbit)) return 1; l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), l4proto, &ct->tuplehash[!dir].tuple, !manip)) return 0; if (skb->ip_summed != CHECKSUM_PARTIAL) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); inside = (void *)skb->data + hdrlen; inside->icmp6.icmp6_cksum = 0; inside->icmp6.icmp6_cksum = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len - hdrlen, IPPROTO_ICMPV6, csum_partial(&inside->icmp6, skb->len - hdrlen, 0)); } nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) return 0; return 1; }