/* No one using conntrack by the time this called. */ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { if (ct->status & IPS_SRC_NAT_DONE) rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); }
static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array, unsigned int entries) { struct test_obj *obj; int err; unsigned int i, insert_retries = 0; s64 start, end; /* * Insertion Test: * Insert entries into table with all keys even numbers */ pr_info(" Adding %d keys\n", entries); start = ktime_get_ns(); for (i = 0; i < entries; i++) { struct test_obj *obj = &array[i]; obj->value.id = i * 2; err = insert_retry(ht, obj, test_rht_params); if (err > 0) insert_retries += err; else if (err) return err; } if (insert_retries) pr_info(" %u insertions retried due to memory pressure\n", insert_retries); test_bucket_stats(ht, entries); rcu_read_lock(); test_rht_lookup(ht, array, entries); rcu_read_unlock(); test_bucket_stats(ht, entries); pr_info(" Deleting %d keys\n", entries); for (i = 0; i < entries; i++) { struct test_obj_val key = { .id = i * 2, }; if (array[i].value.id != TEST_INSERT_FAIL) { obj = rhashtable_lookup_fast(ht, &key, test_rht_params); BUG_ON(!obj); rhashtable_remove_fast(ht, &obj->node, test_rht_params); } cond_resched(); } end = ktime_get_ns(); pr_info(" Duration of test: %lld ns\n", end - start); return end - start; } static struct rhashtable ht; static struct rhltable rhlt; static int __init test_rhltable(unsigned int entries) { struct test_obj_rhl *rhl_test_objects; unsigned long *obj_in_table; unsigned int i, j, k; int ret, err; if (entries == 0) entries = 1; rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries); if (!rhl_test_objects) return -ENOMEM; ret = -ENOMEM; obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long)); if (!obj_in_table) goto out_free; /* nulls_base not supported in rhlist interface */ test_rht_params.nulls_base = 0; err = rhltable_init(&rhlt, &test_rht_params); if (WARN_ON(err)) goto out_free; k = prandom_u32(); ret = 0; for (i = 0; i < entries; i++) { rhl_test_objects[i].value.id = k; err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); if (WARN(err, "error %d on element %d\n", err, i)) break; if (err == 0) set_bit(i, obj_in_table); } if (err) ret = err; pr_info("test %d add/delete pairs into rhlist\n", entries); for (i = 0; i < entries; i++) { struct rhlist_head *h, *pos; struct test_obj_rhl *obj; struct test_obj_val key = { .id = k, }; bool found; rcu_read_lock(); h = rhltable_lookup(&rhlt, &key, test_rht_params); if (WARN(!h, "key not found during iteration %d of %d", i, entries)) { rcu_read_unlock(); break; } if (i) { j = i - 1; rhl_for_each_entry_rcu(obj, pos, h, list_node) { if (WARN(pos == &rhl_test_objects[j].list_node, "old element found, should be gone")) break; } } cond_resched_rcu(); found = false; rhl_for_each_entry_rcu(obj, pos, h, list_node) { if (pos == &rhl_test_objects[i].list_node) { found = true; break; } } rcu_read_unlock(); if (WARN(!found, "element %d not found", i)) break; err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); WARN(err, "rhltable_remove: err %d for iteration %d\n", err, i); if (err == 0) clear_bit(i, obj_in_table); } if (ret == 0 && err) ret = err; for (i = 0; i < entries; i++) { WARN(test_bit(i, obj_in_table), "elem %d allegedly still present", i); err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); if (WARN(err, "error %d on element %d\n", err, i)) break; if (err == 0) set_bit(i, obj_in_table); } pr_info("test %d random rhlist add/delete operations\n", entries); for (j = 0; j < entries; j++) { u32 i = prandom_u32_max(entries); u32 prand = prandom_u32(); cond_resched(); if (prand == 0) prand = prandom_u32(); if (prand & 1) { prand >>= 1; continue; } err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); if (test_bit(i, obj_in_table)) { clear_bit(i, obj_in_table); if (WARN(err, "cannot remove element at slot %d", i)) continue; } else { if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d", i, err, -ENOENT)) continue; } if (prand & 1) { prand >>= 1; continue; } err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params); if (err == 0) { if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i)) continue; } else { if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i)) continue; } if (prand & 1) { prand >>= 1; continue; }
unsigned int nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { struct nf_conntrack_tuple curr_tuple, new_tuple; /* Can't setup nat info for confirmed ct. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior * manipulations (future optimization: if num_manips == 0, * orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ nf_ct_invert_tuplepr(&curr_tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype); if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) { struct nf_conntrack_tuple reply; /* Alter conntrack table so will recognize replies. */ nf_ct_invert_tuplepr(&reply, &new_tuple); nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; if (nfct_help(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { struct nf_nat_conn_key key = { .net = nf_ct_net(ct), .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, .zone = nf_ct_zone(ct), }; int err; err = rhltable_insert_key(&nf_nat_bysource_table, &key, &ct->nat_bysource, nf_nat_bysource_params); if (err) return NF_DROP; } /* It's done. */ if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; return NF_ACCEPT; } EXPORT_SYMBOL(nf_nat_setup_info); static unsigned int __nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip) { /* Force range to this IP; let proto decide mapping for * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). * Use reply in case it's already been mangled (eg local packet). */ union nf_inet_addr ip = (manip == NF_NAT_MANIP_SRC ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3); struct nf_nat_range range = { .flags = NF_NAT_RANGE_MAP_IPS, .min_addr = ip, .max_addr = ip, }; return nf_nat_setup_info(ct, &range, manip); } unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum)); } EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding); /* Do packet manipulations according to nf_nat_setup_info. */ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb) { const struct nf_nat_l3proto *l3proto; const struct nf_nat_l4proto *l4proto; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; /* Invert if this is reply dir. */ if (dir == IP_CT_DIR_REPLY) statusbit ^= IPS_NAT_MASK; /* Non-atomic: these bits don't change. */ if (ct->status & statusbit) { struct nf_conntrack_tuple target; /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); l3proto = __nf_nat_l3proto_find(target.src.l3num); l4proto = __nf_nat_l4proto_find(target.src.l3num, target.dst.protonum); if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype)) return NF_DROP; } return NF_ACCEPT; } EXPORT_SYMBOL_GPL(nf_nat_packet); struct nf_nat_proto_clean { u8 l3proto; u8 l4proto; }; /* kill conntracks with affected NAT section */ static int nf_nat_proto_remove(struct nf_conn *i, void *data) { const struct nf_nat_proto_clean *clean = data; if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) || (clean->l4proto && nf_ct_protonum(i) != clean->l4proto)) return 0; return i->status & IPS_NAT_MASK ? 1 : 0; } static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { if (nf_nat_proto_remove(ct, data)) return 1; if ((ct->status & IPS_SRC_NAT_DONE) == 0) return 0; /* This netns is being destroyed, and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ return 0; } static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, .l4proto = l4proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } static void nf_nat_l3proto_clean(u8 l3proto) { struct nf_nat_proto_clean clean = { .l3proto = l3proto, }; struct net *net; rtnl_lock(); for_each_net(net) nf_ct_iterate_cleanup(net, nf_nat_proto_remove, &clean, 0, 0); rtnl_unlock(); } /* Protocol registration. */ int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto) { const struct nf_nat_l4proto **l4protos; unsigned int i; int ret = 0; mutex_lock(&nf_nat_proto_mutex); if (nf_nat_l4protos[l3proto] == NULL) { l4protos = kmalloc(IPPROTO_MAX * sizeof(struct nf_nat_l4proto *), GFP_KERNEL); if (l4protos == NULL) { ret = -ENOMEM; goto out; } for (i = 0; i < IPPROTO_MAX; i++) RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown); /* Before making proto_array visible to lockless readers, * we must make sure its content is committed to memory. */ smp_wmb(); nf_nat_l4protos[l3proto] = l4protos; } if (rcu_dereference_protected( nf_nat_l4protos[l3proto][l4proto->l4proto], lockdep_is_held(&nf_nat_proto_mutex) ) != &nf_nat_l4proto_unknown) { ret = -EBUSY; goto out; } RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto); out: mutex_unlock(&nf_nat_proto_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_l4proto_register); /* No one stores the protocol anywhere; simply delete it. */ void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto) { mutex_lock(&nf_nat_proto_mutex); RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], &nf_nat_l4proto_unknown); mutex_unlock(&nf_nat_proto_mutex); synchronize_rcu(); nf_nat_l4proto_clean(l3proto, l4proto->l4proto); }