unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; /* nat helper or nfctnetlink also setup binding */ nat = nf_ct_nat_ext_add(ct); if (nat == NULL) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { int err; err = rhashtable_insert_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; }
unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; /* Can't setup nat info for confirmed ct. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { struct nf_nat_conn_key key = { .net = nf_ct_net(ct), .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, .zone = nf_ct_zone(ct), }; int err; err = rhltable_insert_key(&nf_nat_bysource_table, &key, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; } EXPORT_SYMBOL(nf_nat_setup_info); static unsigned int __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { .flags = NF_NAT_RANGE_MAP_IPS, .min_addr = ip, .max_addr = ip, }; return nf_nat_setup_info(ct, &range, manip); } unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply dir. */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ if (ct->status & statusbit) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); l4proto = __nf_nat_l4proto_find(target.src.l3num, target.dst.protonum); if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; } EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; }; /* kill conntracks with affected NAT section */ static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; return i->status & IPS_NAT_MASK ? 1 : 0; } static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { if (nf_nat_proto_remove(ct, data)) return 1; if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ return 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, .l4proto = l4proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } /* Protocol registration. */ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) { const struct nf_nat_l4proto **l4protos; unsigned int i; int ret = 0; mutex_lock(&nf_nat_proto_mutex); if (nf_nat_l4protos[l3proto] == NULL) { l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), GFP_KERNEL); if (l4protos == NULL) { ret = -ENOMEM; goto out; } for (i = 0; i < IPPROTO_MAX; i++) RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. */ smp_wmb(); nf_nat_l4protos[l3proto] = l4protos; } if (rcu_dereference_protected( nf_nat_l4protos[l3proto][l4proto->l4proto], lockdep_is_held(&nf_nat_proto_mutex) ) != &nf_nat_l4proto_unknown) { ret = -EBUSY; goto out; } RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); out: mutex_unlock(&nf_nat_proto_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); /* No one stores the protocol anywhere; simply delete it. */ void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) { mutex_lock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], &nf_nat_l4proto_unknown); mutex_unlock(&nf_nat_proto_mutex); synchronize_rcu(); nf_nat_l4proto_clean(l3proto, l4proto->l4proto); }